Day 55

一、学习内容

昨天实现的是 item-based recommendation. 今天自己来实现一下 user-based recommendation. 只需要在原有基础上增加即可.

package machinelearning.knn;

/**
 * Recommendation with M-distance.
 * @author Fan Min minfanphd@163.com.
 */

import java.io.*;
public class MBR {
/**
 *评分为1-5分
 */
public static final double DEFAULT_RATING = 3.0;

/**
 * 用户数量
 */
private int numUsers;

/**
 * 项目数量
 */
private int numItems;

/**
 * 评分数量(非零值)
 */
private int numRatings;

/**
 * 预测数组
 */
private double[] predictions;

/**
 * 压缩评级矩阵。
 */
private int[][] compressedRatingMatrix;

/**
 *有多少用户对项目进行了评分
 */
private int[] userDegrees;

/**
 * 当前用户的平均分级
 */
private double[] userAverageRatings;

/**
 * 多少项目被评分
 */
private int[] itemDegrees;

/**
 * 当前项目平均评级
 */
private double[] itemAverageRatings;

/**
 * 第一个用户从0开始。让第一个用户有x评级,第二个用户将从x开始。
 */
private int[] userStartingIndices;

/**
 * 没有邻居的对象
 */
private int numNonNeighbors;

/**
 * 确定邻居的半径(增量)
 */
private double radius;

/**
 ************************* 
 * 创建评分矩阵
 * 
 * @param paraRatingFilename
 *            the rating filename.
 * @param paraNumUsers
 *            number of users
 * @param paraNumItems
 *            number of items
 * @param paraNumRatings
 *            number of ratings
 ************************* 
 */
public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
	//初始化三个数组
	numItems = paraNumItems;
	numUsers = paraNumUsers;
	numRatings = paraNumRatings;

	userDegrees = new int[numUsers];
	userStartingIndices = new int[numUsers + 1];
	userAverageRatings = new double[numUsers];
	itemDegrees = new int[numItems];
	compressedRatingMatrix = new int[numRatings][3];
	itemAverageRatings = new double[numItems];

	predictions = new double[numRatings];

	System.out.println("Reading " + paraFilename);

	//读取数据
	File tempFile = new File(paraFilename);
	if (!tempFile.exists()) {
		System.out.println("File " + paraFilename + " does not exists.");
		System.exit(0);
	}
	BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));
	String tempString;
	String[] tempStrArray;
	int tempIndex = 0;
	userStartingIndices[0] = 0;
	userStartingIndices[numUsers] = numRatings;
	while ((tempString = tempBufReader.readLine()) != null) {
		//每一行有三个值
		tempStrArray = tempString.split(",");
		compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);//用户
		compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);//项目
		compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);//评级

		userDegrees[compressedRatingMatrix[tempIndex][0]]++;
		itemDegrees[compressedRatingMatrix[tempIndex][1]]++;

		if (tempIndex > 0) {
			// 开始读取新用户数据
			if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
				userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
			}
		}
		tempIndex++;
	}
	tempBufReader.close();

	double[] tempUserTotalScore = new double[numUsers];
	double[] tempItemTotalScore = new double[numItems];
	for (int i = 0; i < numRatings; i++) {
		tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];
		tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];
	}

	for (int i = 0; i < numUsers; i++) {
		userAverageRatings[i] = tempUserTotalScore[i] / userDegrees[i];
	}
	for (int i = 0; i < numItems; i++) {
		itemAverageRatings[i] = tempItemTotalScore[i] / itemDegrees[i];
	}
}


public void setRadius(double paraRadius) {
	if (paraRadius > 0) {
		radius = paraRadius;
	} else {
		radius = 0.1;
	}
}


public void leaveOneOutPrediction() {
	double tempUserAverageRating;
	int tempUser, tempItem, tempRating;
	System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);

	numNonNeighbors = 0;
	for (int i = 0; i < numRatings; i++) {
		tempUser = compressedRatingMatrix[i][0];
		tempItem = compressedRatingMatrix[i][1];
		tempRating = compressedRatingMatrix[i][2];

		//重新计算当前的平均评分。
		tempUserAverageRating = (userAverageRatings[tempUser] * userDegrees[tempUser] - tempRating)
				/ (userDegrees[tempUser] - 1);

		//重新计算邻居,同时获得评分
		//邻居
		int tempNeighbors = 0;
		double tempTotal = 0;
		int tempComparedUser;
		for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
			tempComparedUser = compressedRatingMatrix[j][0];
			if (tempUser == tempComparedUser) {
				continue;
			}

			if (Math.abs(tempUserAverageRating - userAverageRatings[tempComparedUser]) < radius) {
				tempTotal += compressedRatingMatrix[j][2];
				tempNeighbors++;
			}
		}

		//预测为邻居的平均值。
		if (tempNeighbors > 0) {
			predictions[i] = tempTotal / tempNeighbors;
		} else {
			predictions[i] = DEFAULT_RATING;
			numNonNeighbors++;
		}
	}
}


public double computeMAE() throws Exception {
	double tempTotalError = 0;
	for (int i = 0; i < predictions.length; i++) {
		tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
	}

	return tempTotalError / predictions.length;
}


public double computeRSME() throws Exception {
	double tempTotalError = 0;
	for (int i = 0; i < predictions.length; i++) {
		tempTotalError += (predictions[i] - compressedRatingMatrix[i][2])
				* (predictions[i] - compressedRatingMatrix[i][2]);
	}

	double tempAverage = tempTotalError / predictions.length;

	return Math.sqrt(tempAverage);
}

/**
 ************************* 
 * The entrance of the program.
 * 
 * @param args
 *            Not used now.
 ************************* 
 */
public static void main(String[] args) {
	try {
		MBR tempRecommender = new MBR("D:/data/movielens-943u1682m.txt", 10000, 1682, 1000000);

		for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
			tempRecommender.setRadius(tempRadius);

			tempRecommender.leaveOneOutPrediction();
			double tempMAE = tempRecommender.computeMAE();
			double tempRSME = tempRecommender.computeRSME();

			System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
					+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
		}
	} catch (Exception ee) {
		System.out.println(ee);
	}
}
}

 二、实现结果

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值