矩阵分解
1、BufferedReader 读取文件中数据到缓冲区
BufferedReader tempBufferReader = new BufferedReader(new FileReader(tempFile));
Buffer:表示缓冲区。缓冲区中的内容可以更改,提高访问效率。
如果想接收任意长度的数据,而且避免乱码的产生,就可以使用BufferedReader。
通过readLine() 逐行访问缓冲区数据。读完后关闭,tempBufferReader.close()。
for (int i = 0; i < paraNumRatings; i++) {
tempString = tempBufferReader.readLine();
tempStringArray = tempString.split(",");
dataset[i] = new Triple(Integer.parseInt(tempStringArray[0]),
Integer.parseInt(tempStringArray[1]),Integer.parseInt(tempStringArray[2]));
}// Of for i
1 代码
package machinelearning.recommendersystem;
import java.io.*;
import java.util.Random;
/*
* Matrix factorization for recommender systems.
*/
public class MatrixFactorization {
/**
* Used to generate random numbers.
*/
Random rand = new Random();
/**
* Number of users.
*/
int numUsers;
/**
* Number of items.
*/
int numItems;
/**
* Number of ratings.
*/
int numRatings;
/**
* Train data;
*/
Triple[] dataset;
/**
* A parameter for controlling learning regular.
*/
double alpha;
/**
* A parameter for controlling learning speed.
*/
double lambda;
/**
* The low rank of the small matrices.
*/
int rank;
/**
* The user matrix U.
*/
double[][] userSubspace;
/**
* The item matrix V.
*/
double[][] itemSubspace;
/**
* The lower bound of the rating value.
*/
double ratingLowerBound;
/**
* The upper bound of the rating value.
*/
double ratingUpperBound;
/**
* *********************
* First constructor.
*
* @param paraFilename.
* The data filename.
* @param paraNumUsers.
* The number of users.
* @param paraNumItem.
* The number of items.
* @param paraNumRatings
* The number of ratings.
* @param paraRatingLowerBound
* The lower bound of the rating value.
* @param paraRatingUpperBound
* The upper bound of the rating value.
* ********************
*/
public MatrixFactorization(String paraFilename, int paraNumUsers, int paraNumItem,
int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound) {
numUsers = paraNumUsers;
numItems = paraNumItem;
numRatings = paraNumRatings;
ratingLowerBound = paraRatingLowerBound;
ratingUpperBound = paraRatingUpperBound;
try {
readData(paraFilename, paraNumUsers, paraNumItem, paraNumRatings);
} catch (Exception ee) {
System.out.println("File " + paraFilename + " cannot be read! " + ee);
System.exit(0);
}// Of try
}// Of the first constructor
/**
* ***************************
* Set parameters.
*
* @param paraRank
* The given rank.
* @throws IOException.
* ***************************
*/
public void setParameters(int paraRank, double paraAlpha, double paraLambda) {
rank = paraRank;
alpha = paraAlpha;
lambda = paraLambda;
}// Of setParameters
/**
* ****************************
* Read the data from the file.
*
* @param paraFilename.
* The given file.
* @throws IOException.
* ****************************
*/
public void readData(String paraFilename, int paraNumUsers, int paraNumItem,
int paraNumRatings) throws IOException {
File tempFile = new File(paraFilename);
if (!tempFile.exists()) {
System.out.println("File " + paraFilename + " does not exists.");
System.exit(0);
}// Of if
BufferedReader tempBufferReader = new BufferedReader(new FileReader(tempFile));
// Allocate space.
dataset = new Triple[paraNumRatings];
String tempString;
String[] tempStringArray;
for (int i = 0; i < paraNumRatings; i++) {
tempString = tempBufferReader.readLine();
tempStringArray = tempString.split(",");
dataset[i] = new Triple(Integer.parseInt(tempStringArray[0]),
Integer.parseInt(tempStringArray[1]),Integer.parseInt(tempStringArray[2]));
}// Of for i
tempBufferReader.close();
}// Of readData
/**
* ***************************
* Initialize subspace. Each value is in [0, 1].
* ***************************
*/
void initializeSubspace() {
userSubspace = new double[numUsers][rank];
for(int i = 0; i < numUsers; i++) {
for(int j = 0; j < rank; j++) {
userSubspace[i][j] = rand.nextDouble();
}// Of for i
}// Of for j
itemSubspace = new double[numItems][rank];
for(int i = 0; i < numItems; i++) {
for(int j = 0; j < rank; j++) {
itemSubspace[i][j] = rand.nextDouble();
}// Of for i
}// Of for j
}// Of initializeSubspace
/**
* ***********************
* Predict the rating of the user to the item
*
* @param paraUser
* The user index.
* ***********************
*/
public double predict(int paraUser, int paraItem) {
double resultValue = 0;
for (int i = 0; i < rank; i++) {
resultValue += userSubspace[paraUser][i] * itemSubspace[paraItem][i];
}// Of for i
return resultValue;
}// Of predict
/**
* **********************
* Train.
*
* @param paraRounds
* The number of rounds.
* **********************
*/
public void train(int paraRounds) {
initializeSubspace();
for (int i = 0; i < paraRounds; i++) {
updateNoRegular();
if (i % 50 == 0) {
System.out.println("Round" + i);
System.out.println("MAE:" + mae());
}// Of if
}// Of for i
}// Of train
/**
* **************************
* Update sub-spaces using the train data.
* **************************
*/
public void updateNoRegular() {
for(int i = 0; i < numRatings; i++) {
int tempUserId = dataset[i].user;
int tempItemId = dataset[i].item;
double tempRate = dataset[i].rating;
double tempResidual = tempRate - predict(tempUserId,tempItemId);
double tempValue = 0;
for (int j = 0; j < rank; j++) {
tempValue = 2 * tempResidual * itemSubspace[tempItemId][j];
userSubspace[tempUserId][j] += alpha * tempValue;
}// Of for j
for (int j = 0; j < rank; j++) {
tempValue = 2 * tempResidual * userSubspace[tempUserId][j];
itemSubspace[tempItemId][j] += alpha * tempValue;
}// Of for j
}// Of for i
}// Of updateNoRegular
/**
* **************************
* Update sub-spaces using the train data.
* **************************
*/
public void updateRegular() {
for(int i = 0; i < numRatings; i++) {
int tempUserId = dataset[i].user;
int tempItemId = dataset[i].item;
double tempRate = dataset[i].rating;
double tempResidual = tempRate - predict(tempUserId,tempItemId);
double tempValue = 0;
for (int j = 0; j < rank; j++) {
tempValue = 2 * tempResidual * itemSubspace[tempItemId][j] - lambda * userSubspace[tempUserId][j];
userSubspace[tempUserId][j] += alpha * tempValue;
}// Of for j
for (int j = 0; j < rank; j++) {
tempValue = 2 * tempResidual * userSubspace[tempUserId][j] - lambda * itemSubspace[tempItemId][j];
itemSubspace[tempItemId][j] += alpha * tempValue;
}// Of for j
}// Of for i
}// Of updateRegular
/**
* **************************
* Compute the RSME.
*
* @return RMSE of the current factorization.
* **************************
*/
public double rsme() {
double resultRsme = 0;
int tempTestCount = 0;
for (int i = 0; i < numRatings; i++) {
int tempUserId = dataset[i].user;
int tempItemId = dataset[i].item;
double tempRate = dataset[i].rating;
double tempPrediction = predict(tempUserId,tempItemId);
if(tempPrediction < ratingLowerBound) {
tempPrediction = ratingLowerBound;
} else if (tempPrediction > ratingUpperBound) {
tempPrediction = ratingUpperBound;
}// Of if
double tempError = tempRate - tempPrediction;
resultRsme += tempError * tempError;
tempTestCount++;
}// Of for i
return Math.sqrt(resultRsme / tempTestCount);
}// Of rsme
/**
* *************************
* Compute the MAE.
*
* @return MAE of the current factorization.
* *************************
*/
public double mae() {
double resultMae = 0;
double resultCount = 0;
for (int i = 0; i < numRatings; i++) {
int tempUserId = dataset[i].user;
int tempItemId = dataset[i].item;
double tempRate = dataset[i].rating;
double tempPrediction = predict(tempUserId,tempItemId);
if(tempPrediction < ratingLowerBound) {
tempPrediction = ratingLowerBound;
} else if (tempPrediction > ratingUpperBound) {
tempPrediction = ratingUpperBound;
}// Of if
double tempError = tempRate - tempPrediction;
resultMae += Math.abs(tempError);
resultCount++;
}// Of for i
return (resultMae / resultCount);
}// Of mae
public static void testTrainingTesting(String paraFilename, int paraNumUsers, int paraNumItems,
int paraNumRatings, double paraRatingLowerBound, double paraRatingUpperBound,
int paraRounds) {
try {
//Step 1. reading the training and testing data
MatrixFactorization tempMF = new MatrixFactorization(paraFilename, paraNumUsers, paraNumItems,
paraNumRatings, paraRatingLowerBound, paraRatingUpperBound);
tempMF.setParameters(5, 0.0001, 0.005);
//Step 2. update and predict
System.out.println("Begin training! ! !");
tempMF.train(paraRounds);
//Step 1. compute the mse
double tempMAE = tempMF.mae();
double tempRSME = tempMF.rsme();
System.out.println("Finally, MAE = " + tempMAE + ", RSME = " + tempRSME);
} catch (Exception e) {
e.printStackTrace();
}// Of try
}// Of testTrainingTesting
/**
* ******************
* The entrance of the program.
*
* @param args Not used now.
* ******************
*/
public static void main(String args[]) {
testTrainingTesting("E:\\Java\\Java_sampledata\\movielens-943u1682m.txt",943, 1682, 10000, 1, 5, 2000);
}
public class Triple {
public int user;
public int item;
public double rating;
/**
* ******************
* The constructor.
* ******************
*/
public Triple() {
user = -1;
item = -1;
rating = -1;
}// Of the first constructor
/**
* ********************
* The constructor.
* ********************
*/
public Triple(int paraUser, int paraItem, double paraRating) {
user = paraUser;
item = paraItem;
rating = paraRating;
}// Of the first constructor
/**
* ********************
* Show me.
* ********************
*/
public String toString() {
return "" + user + ", " + item + ", " + rating;
}// Of toString
}// Of class Triple
}// Of class MatrixFactorization