一、学习内容
昨天实现的是 item-based recommendation. 今天自己来实现一下 user-based recommendation. 只需要在原有基础上增加即可.
package machinelearning.knn;
/**
* Recommendation with M-distance.
* @author Fan Min minfanphd@163.com.
*/
import java.io.*;
public class MBR {
/**
*评分为1-5分
*/
public static final double DEFAULT_RATING = 3.0;
/**
* 用户数量
*/
private int numUsers;
/**
* 项目数量
*/
private int numItems;
/**
* 评分数量(非零值)
*/
private int numRatings;
/**
* 预测数组
*/
private double[] predictions;
/**
* 压缩评级矩阵。
*/
private int[][] compressedRatingMatrix;
/**
*有多少用户对项目进行了评分
*/
private int[] userDegrees;
/**
* 当前用户的平均分级
*/
private double[] userAverageRatings;
/**
* 多少项目被评分
*/
private int[] itemDegrees;
/**
* 当前项目平均评级
*/
private double[] itemAverageRatings;
/**
* 第一个用户从0开始。让第一个用户有x评级,第二个用户将从x开始。
*/
private int[] userStartingIndices;
/**
* 没有邻居的对象
*/
private int numNonNeighbors;
/**
* 确定邻居的半径(增量)
*/
private double radius;
/**
*************************
* 创建评分矩阵
*
* @param paraRatingFilename
* the rating filename.
* @param paraNumUsers
* number of users
* @param paraNumItems
* number of items
* @param paraNumRatings
* number of ratings
*************************
*/
public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
//初始化三个数组
numItems = paraNumItems;
numUsers = paraNumUsers;
numRatings = paraNumRatings;
userDegrees = new int[numUsers];
userStartingIndices = new int[numUsers + 1];
userAverageRatings = new double[numUsers];
itemDegrees = new int[numItems];
compressedRatingMatrix = new int[numRatings][3];
itemAverageRatings = new double[numItems];
predictions = new double[numRatings];
System.out.println("Reading " + paraFilename);
//读取数据
File tempFile = new File(paraFilename);
if (!tempFile.exists()) {
System.out.println("File " + paraFilename + " does not exists.");
System.exit(0);
}
BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));
String tempString;
String[] tempStrArray;
int tempIndex = 0;
userStartingIndices[0] = 0;
userStartingIndices[numUsers] = numRatings;
while ((tempString = tempBufReader.readLine()) != null) {
//每一行有三个值
tempStrArray = tempString.split(",");
compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);//用户
compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);//项目
compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);//评级
userDegrees[compressedRatingMatrix[tempIndex][0]]++;
itemDegrees[compressedRatingMatrix[tempIndex][1]]++;
if (tempIndex > 0) {
// 开始读取新用户数据
if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
}
}
tempIndex++;
}
tempBufReader.close();
double[] tempUserTotalScore = new double[numUsers];
double[] tempItemTotalScore = new double[numItems];
for (int i = 0; i < numRatings; i++) {
tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];
tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];
}
for (int i = 0; i < numUsers; i++) {
userAverageRatings[i] = tempUserTotalScore[i] / userDegrees[i];
}
for (int i = 0; i < numItems; i++) {
itemAverageRatings[i] = tempItemTotalScore[i] / itemDegrees[i];
}
}
public void setRadius(double paraRadius) {
if (paraRadius > 0) {
radius = paraRadius;
} else {
radius = 0.1;
}
}
public void leaveOneOutPrediction() {
double tempUserAverageRating;
int tempUser, tempItem, tempRating;
System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);
numNonNeighbors = 0;
for (int i = 0; i < numRatings; i++) {
tempUser = compressedRatingMatrix[i][0];
tempItem = compressedRatingMatrix[i][1];
tempRating = compressedRatingMatrix[i][2];
//重新计算当前的平均评分。
tempUserAverageRating = (userAverageRatings[tempUser] * userDegrees[tempUser] - tempRating)
/ (userDegrees[tempUser] - 1);
//重新计算邻居,同时获得评分
//邻居
int tempNeighbors = 0;
double tempTotal = 0;
int tempComparedUser;
for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
tempComparedUser = compressedRatingMatrix[j][0];
if (tempUser == tempComparedUser) {
continue;
}
if (Math.abs(tempUserAverageRating - userAverageRatings[tempComparedUser]) < radius) {
tempTotal += compressedRatingMatrix[j][2];
tempNeighbors++;
}
}
//预测为邻居的平均值。
if (tempNeighbors > 0) {
predictions[i] = tempTotal / tempNeighbors;
} else {
predictions[i] = DEFAULT_RATING;
numNonNeighbors++;
}
}
}
public double computeMAE() throws Exception {
double tempTotalError = 0;
for (int i = 0; i < predictions.length; i++) {
tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
}
return tempTotalError / predictions.length;
}
public double computeRSME() throws Exception {
double tempTotalError = 0;
for (int i = 0; i < predictions.length; i++) {
tempTotalError += (predictions[i] - compressedRatingMatrix[i][2])
* (predictions[i] - compressedRatingMatrix[i][2]);
}
double tempAverage = tempTotalError / predictions.length;
return Math.sqrt(tempAverage);
}
/**
*************************
* The entrance of the program.
*
* @param args
* Not used now.
*************************
*/
public static void main(String[] args) {
try {
MBR tempRecommender = new MBR("D:/data/movielens-943u1682m.txt", 10000, 1682, 1000000);
for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {
tempRecommender.setRadius(tempRadius);
tempRecommender.leaveOneOutPrediction();
double tempMAE = tempRecommender.computeMAE();
double tempRSME = tempRecommender.computeRSME();
System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
+ ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
}
} catch (Exception ee) {
System.out.println(ee);
}
}
}
二、实现结果