package package1;
import java.util.*;
import java.util.Map.Entry;
/**
* @author Bert Q
* ClassName : Computation
* Description
*/
public class Computation {
/**
* 需要用到的矩阵文件路径
*/
private static final String OTHER_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\other.txt";
private static final String TARGETHADE_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\targethade.txt";
private static final String TARGETFULL_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\targetfull.txt";
/**
* 源相似性数组行列数
*/
private static final int TARGET_MATRIX_ROWS = 1;
private static final int TARGET_MATRIX_COLS = 1682;
private static final int OTHER_MATRIX_ROWS = 942;
private static final int OTHER_MATRIX_COLS = 1682;
/**
* 定义无意义的值
*/
private static final int MEANINGLESS_VALUE = -1000;
private static final int PERASON_CALC = 1;
private static final int CONSINE_CALC = 2;
public static void main(String[] args) {
double[][] otherMatrix = null;
double[][] targetfullMatrix = null;
double[][] targethadeMatrix = null;
try {
otherMatrix = KnnInputOutput.TxtToDouArray(OTHER_MATRIX_PATH, OTHER_MATRIX_ROWS, OTHER_MATRIX_COLS);
targetfullMatrix = KnnInputOutput.TxtToDouArray(TARGETFULL_MATRIX_PATH, TARGET_MATRIX_ROWS, TARGET_MATRIX_COLS);
targethadeMatrix = KnnInputOutput.TxtToDouArray(TARGETHADE_MATRIX_PATH, TARGET_MATRIX_ROWS, TARGET_MATRIX_COLS);
} catch (Exception e) {
}
for (int i = 10; i <= 300; i += 10) {
System.out.println("k:" + i);
System.out.println("PEARSON:" + calcMAEByPearson(targetfullMatrix, targethadeMatrix, otherMatrix, i));
System.out.println("Cosine:" + calcMAEByConsine(targetfullMatrix, targethadeMatrix, otherMatrix, i));
if (calcMAEByPearson(targetfullMatrix, targethadeMatrix, otherMatrix, i) > calcMAEByConsine(targetfullMatrix, targethadeMatrix, otherMatrix, i)) {
System.out.println("cosine准确!");
} else {
System.out.println("person准确!");
}
System.out.println("———————————————————————————————————————————————");
}
}
public static double calcMAEByPearson(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int i) {
return calcMAE(targetfullMatrix, targethadeMatrix, otherMatrix, i, PERASON_CALC);
}
public static double calcMAEByConsine(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int i) {
return calcMAE(targetfullMatrix, targethadeMatrix, otherMatrix, i, CONSINE_CALC);
}
private static double calcMAE(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int neighbourNum, int calcMethod) {
double molecule = 0d; // 初始化分子
double denominator = 0d;// 初始化分母
double[][] pSim_full = null;
double[][] pSim_hade = null;
switch (calcMethod) {
case PERASON_CALC:
pSim_hade = pearsonMatrixCalc(targethadeMatrix, otherMatrix);
break;
case CONSINE_CALC:
pSim_hade = cosineMatrixCalc(targethadeMatrix, otherMatrix);
break;
}
List<Entry<Integer, Double>> sortHadeMatrix = sortMatrix(pSim_hade);
double[][] preRatingsHadeMatrix = getPreRatingsMatrix(targethadeMatrix, otherMatrix, pSim_hade, getNeighbourMatrixBySortedList(sortHadeMatrix), neighbourNum);
for (int i = 0; i < targetfullMatrix.length; i++) {
for (int j = 0; j < targetfullMatrix[i].length; j++) {
if (targetfullMatrix[i][j] == targethadeMatrix[i][j]) {
continue;
} else if (targethadeMatrix[i][j] == 0
&& preRatingsHadeMatrix[i][j] != MEANINGLESS_VALUE) {
// System.out.println("当前评分项目为:" + i + "j:" + j + "hade:" + targethadeMatrix[i][j] + "原来分数为:" + targetfullMatrix[i][j] +
// "得出的新评分为:" + preRatingsHadeMatrix[i][j]);
molecule += Math.abs(targetfullMatrix[i][j] - preRatingsHadeMatrix[i][j]);
denominator++;
}
}
}
return molecule / denominator;
}
/**
* 获取preRating矩阵,其为目标用户对未评分项目的预测评分矩阵
* 若已有评分,则赋上一个无意义的数(-1000)
* 否则,计算其预测评分
*
* @param targetMatrix 目标用户矩阵
* @param otherMatrix 其他用户矩阵
* @param simMatrix 相似性矩阵
* @param neighbour 相邻矩阵
* @return 目标用户对未评分项的矩阵
* @see calcPredictionScore(int i, double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour)
*/
public static double[][] getPreRatingsMatrix(double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour, int neighbourNum) {
double[][] preRatings = new double[1][TARGET_MATRIX_COLS];
for (int i = 0; i < TARGET_MATRIX_COLS; i++) {
//如果未评分,去评分
if (targetMatrix[0][i] == 0) {
preRatings[0][i] = calcPredictionScore(i, targetMatrix, otherMatrix, simMatrix, neighbour, neighbourNum);
} else {
// 用户已给出评分,则赋上一个无意义的值
preRatings[0][i] = MEANINGLESS_VALUE;
}
}
return preRatings;
}
/**
* 计算预测评分
*
* @param i 该用户对第i的项目的预测
* @param targetMatrix 目标用户矩阵
* @param otherMatrix 其他用户矩阵
* @param simMatrix 相似性矩阵
* @param neighbour 相邻矩阵
* @return 计算后得到的评分
*/
public static double calcPredictionScore(int i, double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour, int neighbourNum) {
double molecule = 0d; // 初始化分子
double denominator = 0d;// 初始化分母
double avg_rua = avgInMatrixRowWithout0(targetMatrix, 1);// 计算目标用户的平均评分 (公式最左边Rua)
for (int j = 0; j < neighbourNum; j++) {// 取相邻矩阵的前N个进行预测
int currentNeighbour = neighbour[0][j]; // 取得当前相邻矩阵的角标
double sim = simMatrix[0][currentNeighbour];// 计算与相邻矩阵的相似性
// 如果sim为无意义的值或者相似性最高的用户对其也未作出评分,跳过此次计算。
if (sim == MEANINGLESS_VALUE || otherMatrix[currentNeighbour][i] == 0) {
continue;
}
//计算其他用户对第i个项目的评分减去其他用户的平均评分
double r_ret = otherMatrix[currentNeighbour][i] - avgInMatrixRowWithout0(otherMatrix, currentNeighbour + 1);
molecule += sim * r_ret;
denominator += Math.abs(sim);
}
// 如果分母为0时,直接返回无意义的数,否则根据公式计算返回结果
return denominator == 0 ? MEANINGLESS_VALUE : avg_rua + (molecule / denominator);
}
/**
* 计算某矩阵某行的平均值,去除0
*
* @param matrix 被计算的矩阵
* @param row 需要被计算平均值的行
* @return 平均值
*/
public static double avgInMatrixRowWithout0(double[][] matrix, int row) {
double sum = 0;
int divideCount = 0;
for (int j = 0; j < matrix[row - 1].length; j++) {
if (matrix[row - 1][j] == 0) {
continue;
}
sum += matrix[row - 1][j];
divideCount++;
}
return sum / divideCount;
}
/**
* 对矩阵进行排序
*
* @param matrix 要被排序的矩阵
* @return 排序后的list集合
*/
public static List<Map.Entry<Integer, Double>> sortMatrix(double[][] matrix) {
Map<Integer, Double> resultMap = new HashMap<Integer, Double>();
for (int i = 0; i < matrix[0].length; i++) {
resultMap.put(i, matrix[0][i]);
}
List<Map.Entry<Integer, Double>> sortedList = new ArrayList<Map.Entry<Integer, Double>>(resultMap.entrySet());
Collections.sort(sortedList, new Comparator<Map.Entry<Integer, Double>>() {
public int compare(Entry<Integer, Double> o1,
Entry<Integer, Double> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
return sortedList;
}
/**
* 将邻居矩阵(角标)从排序后的list中抽离出来
* @param sortedList
* @return 邻居矩阵(角标)
*/
public static int[][] getNeighbourMatrixBySortedList(List<Map.Entry<Integer, Double>> sortedList) {
int[][] neighborMatrx = new int[NEIGHBOUR_MATRIX_ROWS][NEIGHBOUR_MATRIX_COLS];
for (int i = 0; i < sortedList.size(); i++) {
Entry<Integer, Double> entry = sortedList.get(i);
neighborMatrx[0][i] = entry.getKey();
}
return neighborMatrx;
}
/**
* 将排序后的矩阵从排序后的list中抽离出来
* @param sortedList
* @return 排序后的矩阵
*/
public static double[][] getSortedMatrixBySortedList(List<Map.Entry<Integer, Double>> sortedList) {
double[][] sortedMatrix = new double[SORTED_MATRIX_ROWS][SORTED_MATRIX_COLS];
for (int i = 0; i < sortedList.size(); i++) {
Entry<Integer, Double> entry = sortedList.get(i);
sortedMatrix[0][i] = entry.getValue();
}
return sortedMatrix;
}
/**
* 使用如下Pearson相似性公式计算,计算目标用户与其他用户间的相似性。
*
* @param targetMatrix target矩阵
* @param otherMatrix other矩阵
* @return 通过 Pearson相似性公式 计算后得到的矩阵 SimMatrix
*/
public static double[][] pearsonMatrixCalc(double[][] targetMatrix, double[][] otherMatrix) {
double[][] ret = new double[1][otherMatrix.length];
for (int i = 0; i < otherMatrix.length; i++) {
// 计算rx,ry的平均数
double rx_avg = avgInMatrixRowWithout0(targetMatrix, 1);
double ry_avg = avgInMatrixRowWithout0(otherMatrix, i + 1);
double molecule = 0;// 初始化分子
double denominatorX = 0;// 初始化分母左边
double denominatorY = 0;// 初始化分母右边
for (int j = 0; j < otherMatrix[i].length; j++) {
if (targetMatrix[0][j] == 0 || otherMatrix[i][j] == 0) {
continue; // 当target或other对于某一项的值为0时,跳过计算
}
double tempX = targetMatrix[0][j] - rx_avg; //计算r(x,i)-rx平均值,并存为临时变量,减少重复计算
double tempY = otherMatrix[i][j] - ry_avg; //计算r(y,i)-ry平均值,并存为临时变量
molecule += tempX * tempY; // 得出分子之和
denominatorX += tempX * tempX; //得出未开根号的和
denominatorY += tempY * tempY; //得出未开根号的和
}
//做一个分母的检查
if (denominatorX == 0 || denominatorY == 0) {
// 分母为0 给出一个无意义的值
ret[0][i] = MEANINGLESS_VALUE;
} else {
ret[0][i] = molecule / (Math.sqrt(denominatorX) * Math.sqrt(denominatorY));
}
}
return ret;
}
/**
* 使用如下Cosine相似性公式计算,计算目标用户与其他用户间的相似性。
*
* @param targetMatrix target矩阵
* @param otherMatrix other矩阵
* @return 计算出来的相似性矩阵 SimMatrix
*/
public static double[][] cosineMatrixCalc(double[][] targetMatrix, double[][] otherMatrix) {
double[][] ret = new double[1][otherMatrix.length];
for (int i = 0; i < otherMatrix.length; i++) {
double a = 0, b = 0, c = 0;
for (int j = 0; j < otherMatrix[i].length; j++) {
if (otherMatrix[i][j] != 0 && targetMatrix[0][j] != 0) {
a += otherMatrix[i][j] * targetMatrix[0][j];
b += otherMatrix[i][j] * otherMatrix[i][j];
c += targetMatrix[0][j] * targetMatrix[0][j];
}
}
if (b != 0 && c != 0) {
ret[0][i] = a / (Math.sqrt(b) * Math.sqrt(c));
} else {
ret[0][i] = MEANINGLESS_VALUE;
}
}
return ret;
}
/**
* 计算某矩阵某行的平均值,去除一个无意义的数
*
* @param matrix 被计算的矩阵
* @param row 需要被计算平均值的行
* @param meaningLessNum 所要去掉的无意义的数
* @return 平均值
*/
public static double avgInMatrixRowWithoutMeaningLessNum(double[][] matrix, int row, int meaningLessNum) {
double sum = 0;
int divideCount = 0;
for (int j = 0; j < matrix[row - 1].length; j++) {
if (matrix[row - 1][j] == meaningLessNum) {
continue;
}
sum += matrix[row - 1][j];
divideCount++;
}
return sum / divideCount;
}
/**
* 打印二维数组
*
* @param arr 数组
*/
public static void printArrays(double[][] arr) {
if (arr == null) {
return;
}
for (double[] objects : arr) {
System.out.println(Arrays.toString(objects));
}
}
}
推荐算法-pearson相似性计算以及cosine相似性计算结果的实现以及比较代码
最新推荐文章于 2024-08-14 18:40:11 发布