跟着老师学Java

矩阵分解

1、BufferedReader 读取文件中数据到缓冲区

BufferedReader tempBufferReader = new BufferedReader(new FileReader(tempFile));

Buffer:表示缓冲区。缓冲区中的内容可以更改,提高访问效率。

如果想接收任意长度的数据,而且避免乱码的产生,就可以使用BufferedReader。

通过readLine() 逐行访问缓冲区数据。读完后关闭,tempBufferReader.close()。

for (int i = 0; i < paraNumRatings; i++) {
			tempString = tempBufferReader.readLine();
			tempStringArray = tempString.split(",");
			dataset[i] = new Triple(Integer.parseInt(tempStringArray[0]),
					Integer.parseInt(tempStringArray[1]),Integer.parseInt(tempStringArray[2]));	
		}// Of for i

1 代码

package machinelearning.recommendersystem;

 
import java.io.*;
import java.util.Random;

/*
 * Matrix factorization for recommender systems.
 */

public class MatrixFactorization {
	/**
	 * Used to generate random numbers.
	 */
	Random rand = new Random();
	
	/**
	 * Number of users.
	 */
	int numUsers;
	
	/**
	 * Number of items.
	 */
	int numItems;
	
	/**
	 * Number of ratings.
	 */
	int numRatings;
	
	/**
	 * Train data;
	 */
	Triple[] dataset;
	
	/**
	 * A parameter for controlling learning regular.
	 */
	double alpha;
	
	/**
	 * A parameter for controlling learning speed. 
	 */
	double lambda;
	
	/**
	 * The low rank of the small matrices.
	 */
	int rank;
	
	/**
	 * The user matrix U.
	 */
	double[][] userSubspace;
	
	/**
	 * The item matrix V.
	 */
	double[][] itemSubspace;
	
	/**
	 * The lower bound of the rating value.
	 */
	double ratingLowerBound;
	
	/**
	 * The upper bound of the rating value.
	 */
	double ratingUpperBound;
	
	/**
	 * *********************
	 * First constructor.
	 * 
	 * @param paraFilename.
	 * 				The data filename.
	 * @param paraNumUsers.
	 *              The number of users.
	 * @param paraNumItem.
	 *              The number of items.
	 * @param paraNumRatings
	 *              The number of ratings.
	 * @param paraRatingLowerBound
	 *              The lower bound of the rating value.
	 * @param paraRatingUpperBound 
	 *              The upper bound of the rating value.             
	 * ********************
	 */
	public MatrixFactorization(String paraFilename, int paraNumUsers, int paraNumItem, 
			int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound) {
		numUsers = paraNumUsers;
		numItems = paraNumItem;
		numRatings = paraNumRatings;
		ratingLowerBound = paraRatingLowerBound;
	    ratingUpperBound = paraRatingUpperBound;
	    
	    try {
	    	readData(paraFilename, paraNumUsers, paraNumItem, paraNumRatings);
	    } catch (Exception ee) {
	    	System.out.println("File " + paraFilename + " cannot be read! " + ee);
	    	System.exit(0);
	    }// Of try
	}// Of the first constructor
	
	/**
	 * ***************************
	 * Set parameters.
	 * 
	 * @param paraRank
	 *  			The given rank.
	 * @throws IOException.
	 * ***************************
	 */
	public void setParameters(int paraRank, double paraAlpha, double paraLambda) {
		rank = paraRank;
		alpha = paraAlpha;
		lambda = paraLambda;	
	}// Of setParameters
	
	/**
	 * ****************************
	 * Read the data from the file.
	 * 
	 * @param paraFilename.
	 *      		The given file.
	 * @throws IOException.
	 * ****************************
	 */
	public void readData(String paraFilename, int paraNumUsers, int paraNumItem, 
			int paraNumRatings) throws IOException {
		File tempFile = new File(paraFilename);
		if (!tempFile.exists()) {
			System.out.println("File " + paraFilename + " does not exists.");
			System.exit(0);
		}// Of if
		BufferedReader tempBufferReader = new BufferedReader(new FileReader(tempFile));
		
		// Allocate space.
		dataset = new Triple[paraNumRatings];
		String tempString;
		String[] tempStringArray;
		for (int i = 0; i < paraNumRatings; i++) {
			tempString = tempBufferReader.readLine();
			tempStringArray = tempString.split(",");
			dataset[i] = new Triple(Integer.parseInt(tempStringArray[0]),
					Integer.parseInt(tempStringArray[1]),Integer.parseInt(tempStringArray[2]));	
		}// Of for i
		
		tempBufferReader.close();
	}// Of readData
	
	/**
	 * ***************************
	 * Initialize subspace. Each value is in [0, 1].
	 * ***************************
	 */
	void initializeSubspace() {
		userSubspace = new double[numUsers][rank];
		
		for(int i = 0; i < numUsers; i++) {
			for(int j = 0; j < rank; j++) {
				userSubspace[i][j] = rand.nextDouble();
			}// Of for i
		}// Of for j
		
        itemSubspace = new double[numItems][rank];
		
		for(int i = 0; i < numItems; i++) {
			for(int j = 0; j < rank; j++) {
				itemSubspace[i][j] = rand.nextDouble();
			}// Of for i
		}// Of for j
	}// Of initializeSubspace
	
	/**
	 * ***********************
	 *  Predict the rating of the user to the item
	 *  
	 *  @param paraUser
	 *         		The user index.
	 *  ***********************
	 */
	public double predict(int paraUser, int paraItem) {
		double resultValue = 0;
		for (int i = 0; i < rank; i++) {
			resultValue += userSubspace[paraUser][i] * itemSubspace[paraItem][i];
		}// Of for i
		return resultValue;
	}// Of predict
	
	/**
	 * **********************
	 * Train.
	 * 
	 * @param paraRounds
	 *           	The number of rounds.
	 * **********************          
	 */
	public void train(int paraRounds) {
		initializeSubspace();
		
		for (int i = 0; i < paraRounds; i++) {
			updateNoRegular();
			if (i % 50 == 0) {
				System.out.println("Round" + i);
				System.out.println("MAE:" + mae());
			}// Of if	
		}// Of for i	
	}// Of train
	
	 /**
	 * **************************
	 * Update sub-spaces using the train data.
	 * **************************
	 */
	public void updateNoRegular() {
		for(int i = 0; i < numRatings; i++) {
			int tempUserId = dataset[i].user;
			int tempItemId = dataset[i].item;
			double tempRate = dataset[i].rating;
			
			double tempResidual = tempRate - predict(tempUserId,tempItemId);
			double tempValue = 0;
			
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * itemSubspace[tempItemId][j];
				userSubspace[tempUserId][j] += alpha * tempValue;
			}// Of for j
			
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * userSubspace[tempUserId][j];
				itemSubspace[tempItemId][j] += alpha * tempValue;
			}// Of for j		
		}// Of for i
	}// Of updateNoRegular
	
	/**
	 * **************************
	 * Update sub-spaces using the train data.
	 * **************************
	 */
	public void updateRegular() {
		for(int i = 0; i < numRatings; i++) {
			int tempUserId = dataset[i].user;
			int tempItemId = dataset[i].item;
			double tempRate = dataset[i].rating;
			
			double tempResidual = tempRate - predict(tempUserId,tempItemId);
			double tempValue = 0;
			
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * itemSubspace[tempItemId][j] - lambda * userSubspace[tempUserId][j];
				userSubspace[tempUserId][j] += alpha * tempValue;
			}// Of for j
			
			for (int j = 0; j < rank; j++) {
				tempValue = 2 * tempResidual * userSubspace[tempUserId][j] - lambda * itemSubspace[tempItemId][j];
				itemSubspace[tempItemId][j] += alpha * tempValue;
			}// Of for j		
		}// Of for i
	}// Of updateRegular
	
	/**
	 * **************************
	 * Compute the RSME.
	 * 
	 * @return RMSE of the current factorization.
	 * **************************
	 */
	public double rsme() {
		double resultRsme = 0;
		int tempTestCount = 0;
		
		for (int i = 0; i < numRatings; i++) {
			int tempUserId = dataset[i].user;
			int tempItemId = dataset[i].item;
			double tempRate = dataset[i].rating;
			
			double tempPrediction = predict(tempUserId,tempItemId);
			
			if(tempPrediction < ratingLowerBound) {
				tempPrediction = ratingLowerBound;
			} else if (tempPrediction > ratingUpperBound) {
				tempPrediction = ratingUpperBound;
			}// Of if
			
			double tempError = tempRate - tempPrediction;
			resultRsme += tempError * tempError;
			tempTestCount++;
		}// Of for i
		
		return Math.sqrt(resultRsme / tempTestCount);
	}// Of rsme
	
	/**
	 * *************************
	 * Compute the MAE.
	 * 
	 * @return MAE of the current factorization.
	 * *************************
	 */
	public double mae() {
		double resultMae = 0;
		double resultCount = 0;
		
		for (int i = 0; i < numRatings; i++) {
			int tempUserId = dataset[i].user;
			int tempItemId = dataset[i].item;
			double tempRate = dataset[i].rating;
			
			double tempPrediction = predict(tempUserId,tempItemId);
			
			if(tempPrediction < ratingLowerBound) {
				tempPrediction = ratingLowerBound;
			} else if (tempPrediction > ratingUpperBound) {
				tempPrediction = ratingUpperBound;
			}// Of if
			
			double tempError = tempRate - tempPrediction;
			
			resultMae += Math.abs(tempError);
			resultCount++;
		}// Of for i
		
		return (resultMae / resultCount);
	}// Of mae
	
	public static void testTrainingTesting(String paraFilename, int paraNumUsers, int paraNumItems,
			int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound,
			int paraRounds) {
		try {
			//Step 1. reading the training and testing data
			MatrixFactorization tempMF = new MatrixFactorization(paraFilename, paraNumUsers, paraNumItems,
					paraNumRatings, paraRatingLowerBound, paraRatingUpperBound);
			
			tempMF.setParameters(5, 0.0001, 0.005);
			
			//Step 2. update and predict
			System.out.println("Begin training! ! !");
			tempMF.train(paraRounds);
			
			//Step 1. compute the mse
			double tempMAE = tempMF.mae();
			double tempRSME = tempMF.rsme();
			System.out.println("Finally, MAE = " + tempMAE + ", RSME = " + tempRSME);
		} catch (Exception e) {
			e.printStackTrace();
		}// Of try
	}// Of testTrainingTesting
	
	/**
	 * ******************
	 * The entrance of the program.
	 * 
	 * @param args Not used now.
	 * ******************
	 */
	public static void main(String args[]) {
		testTrainingTesting("E:\\Java\\Java_sampledata\\movielens-943u1682m.txt",943, 1682, 10000, 1, 5, 2000);
	}
	
	public class Triple {
		public int user;
		public int item;
		public double rating;
		
		/**
		 * ******************
		 * The constructor.
		 * ******************
		 */
		public Triple() {
			user = -1;
			item = -1;
			rating = -1;
		}// Of the first constructor
		
		/**
		 * ********************
		 * The constructor.
		 * ********************
		 */
		public Triple(int paraUser, int paraItem, double paraRating) {
			user = paraUser;
			item = paraItem;
			rating = paraRating;
		}// Of the first constructor
		
		/**
		 * ********************
		 * Show me.
		 * ********************
		 */
		public String toString() {
			return "" + user + ", " + item + ", " + rating;
		}// Of toString 
	}// Of class Triple

}// Of class MatrixFactorization

2 运行结果

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值