推荐算法-pearson相似性计算以及cosine相似性计算结果的实现以及比较代码

2 篇文章 0 订阅
2 篇文章 0 订阅
package package1;

import java.util.*;

import java.util.Map.Entry;

/**
 * @author Bert Q
 * ClassName : Computation
 * Description
 */
public class Computation {

    /**
     * 需要用到的矩阵文件路径
     */
    private static final String OTHER_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\other.txt";
    private static final String TARGETHADE_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\targethade.txt";
    private static final String TARGETFULL_MATRIX_PATH = "F:\\MyProjects\\XMGL\\resources\\targetfull.txt";

    /**
     * 源相似性数组行列数
     */
    private static final int TARGET_MATRIX_ROWS = 1;
    private static final int TARGET_MATRIX_COLS = 1682;
    private static final int OTHER_MATRIX_ROWS = 942;
    private static final int OTHER_MATRIX_COLS = 1682;

    /**
     * 定义无意义的值
     */
    private static final int MEANINGLESS_VALUE = -1000;

    private static final int PERASON_CALC = 1;
    private static final int CONSINE_CALC = 2;

    public static void main(String[] args) {
        double[][] otherMatrix = null;
        double[][] targetfullMatrix = null;
        double[][] targethadeMatrix = null;
        try {
            otherMatrix = KnnInputOutput.TxtToDouArray(OTHER_MATRIX_PATH, OTHER_MATRIX_ROWS, OTHER_MATRIX_COLS);
            targetfullMatrix = KnnInputOutput.TxtToDouArray(TARGETFULL_MATRIX_PATH, TARGET_MATRIX_ROWS, TARGET_MATRIX_COLS);
            targethadeMatrix = KnnInputOutput.TxtToDouArray(TARGETHADE_MATRIX_PATH, TARGET_MATRIX_ROWS, TARGET_MATRIX_COLS);
        } catch (Exception e) {
        }
        for (int i = 10; i <= 300; i += 10) {
            System.out.println("k:" + i);
            System.out.println("PEARSON:" + calcMAEByPearson(targetfullMatrix, targethadeMatrix, otherMatrix, i));
            System.out.println("Cosine:" + calcMAEByConsine(targetfullMatrix, targethadeMatrix, otherMatrix, i));
            if (calcMAEByPearson(targetfullMatrix, targethadeMatrix, otherMatrix, i) > calcMAEByConsine(targetfullMatrix, targethadeMatrix, otherMatrix, i)) {
                System.out.println("cosine准确!");
            } else {
                System.out.println("person准确!");
            }
            System.out.println("———————————————————————————————————————————————");
        }

    }

    public static double calcMAEByPearson(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int i) {
        return calcMAE(targetfullMatrix, targethadeMatrix, otherMatrix, i, PERASON_CALC);
    }

    public static double calcMAEByConsine(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int i) {
        return calcMAE(targetfullMatrix, targethadeMatrix, otherMatrix, i, CONSINE_CALC);
    }

    private static double calcMAE(double[][] targetfullMatrix, double[][] targethadeMatrix, double[][] otherMatrix, int neighbourNum, int calcMethod) {
        double molecule = 0d; // 初始化分子
        double denominator = 0d;// 初始化分母

        double[][] pSim_full = null;
        double[][] pSim_hade = null;

        switch (calcMethod) {
            case PERASON_CALC:             
                pSim_hade = pearsonMatrixCalc(targethadeMatrix, otherMatrix);
                break;
            case CONSINE_CALC:
                pSim_hade = cosineMatrixCalc(targethadeMatrix, otherMatrix);
                break;
        }

        List<Entry<Integer, Double>> sortHadeMatrix = sortMatrix(pSim_hade);

        double[][] preRatingsHadeMatrix = getPreRatingsMatrix(targethadeMatrix, otherMatrix, pSim_hade, getNeighbourMatrixBySortedList(sortHadeMatrix), neighbourNum);

        for (int i = 0; i < targetfullMatrix.length; i++) {
            for (int j = 0; j < targetfullMatrix[i].length; j++) {
                if (targetfullMatrix[i][j] == targethadeMatrix[i][j]) {
                    continue;
                } else if (targethadeMatrix[i][j] == 0
                        && preRatingsHadeMatrix[i][j] != MEANINGLESS_VALUE) {
//                    System.out.println("当前评分项目为:" + i + "j:" + j + "hade:" + targethadeMatrix[i][j] + "原来分数为:" + targetfullMatrix[i][j] +
//                            "得出的新评分为:" + preRatingsHadeMatrix[i][j]);
                    molecule += Math.abs(targetfullMatrix[i][j] - preRatingsHadeMatrix[i][j]);
                    denominator++;
                }
            }
        }
        return molecule / denominator;
    }

    /**
     * 获取preRating矩阵,其为目标用户对未评分项目的预测评分矩阵
     * 若已有评分,则赋上一个无意义的数(-1000)
     * 否则,计算其预测评分
     *
     * @param targetMatrix 目标用户矩阵
     * @param otherMatrix  其他用户矩阵
     * @param simMatrix    相似性矩阵
     * @param neighbour    相邻矩阵
     * @return 目标用户对未评分项的矩阵
     * @see calcPredictionScore(int i, double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour)
     */
    public static double[][] getPreRatingsMatrix(double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour, int neighbourNum) {

        double[][] preRatings = new double[1][TARGET_MATRIX_COLS];
        for (int i = 0; i < TARGET_MATRIX_COLS; i++) {
            //如果未评分,去评分
            if (targetMatrix[0][i] == 0) {
                preRatings[0][i] = calcPredictionScore(i, targetMatrix, otherMatrix, simMatrix, neighbour, neighbourNum);
            } else {
                // 用户已给出评分,则赋上一个无意义的值
                preRatings[0][i] = MEANINGLESS_VALUE;
            }
        }
        return preRatings;
    }

    /**
     * 计算预测评分
     *
     * @param i            该用户对第i的项目的预测
     * @param targetMatrix 目标用户矩阵
     * @param otherMatrix  其他用户矩阵
     * @param simMatrix    相似性矩阵
     * @param neighbour    相邻矩阵
     * @return 计算后得到的评分
     */
    public static double calcPredictionScore(int i, double[][] targetMatrix, double[][] otherMatrix, double[][] simMatrix, int[][] neighbour, int neighbourNum) {
        double molecule = 0d; // 初始化分子
        double denominator = 0d;// 初始化分母

        double avg_rua = avgInMatrixRowWithout0(targetMatrix, 1);// 计算目标用户的平均评分 (公式最左边Rua)

        for (int j = 0; j < neighbourNum; j++) {// 取相邻矩阵的前N个进行预测

            int currentNeighbour = neighbour[0][j]; // 取得当前相邻矩阵的角标
            double sim = simMatrix[0][currentNeighbour];// 计算与相邻矩阵的相似性
            // 如果sim为无意义的值或者相似性最高的用户对其也未作出评分,跳过此次计算。
            if (sim == MEANINGLESS_VALUE || otherMatrix[currentNeighbour][i] == 0) {
                continue;
            }

            //计算其他用户对第i个项目的评分减去其他用户的平均评分
            double r_ret = otherMatrix[currentNeighbour][i] - avgInMatrixRowWithout0(otherMatrix, currentNeighbour + 1);

            molecule += sim * r_ret;
            denominator += Math.abs(sim);
        }

        // 如果分母为0时,直接返回无意义的数,否则根据公式计算返回结果
        return denominator == 0 ? MEANINGLESS_VALUE : avg_rua + (molecule / denominator);
    }


    /**
     * 计算某矩阵某行的平均值,去除0
     *
     * @param matrix 被计算的矩阵
     * @param row    需要被计算平均值的行
     * @return 平均值
     */
    public static double avgInMatrixRowWithout0(double[][] matrix, int row) {
        double sum = 0;
        int divideCount = 0;
        for (int j = 0; j < matrix[row - 1].length; j++) {
            if (matrix[row - 1][j] == 0) {
                continue;
            }
            sum += matrix[row - 1][j];
            divideCount++;
        }
        return sum / divideCount;
    }

    /**
     * 对矩阵进行排序
     *
     * @param matrix 要被排序的矩阵
     * @return 排序后的list集合
     */
    public static List<Map.Entry<Integer, Double>> sortMatrix(double[][] matrix) {
        Map<Integer, Double> resultMap = new HashMap<Integer, Double>();

        for (int i = 0; i < matrix[0].length; i++) {
            resultMap.put(i, matrix[0][i]);
        }

        List<Map.Entry<Integer, Double>> sortedList = new ArrayList<Map.Entry<Integer, Double>>(resultMap.entrySet());
        Collections.sort(sortedList, new Comparator<Map.Entry<Integer, Double>>() {
            public int compare(Entry<Integer, Double> o1,
                               Entry<Integer, Double> o2) {
                return o2.getValue().compareTo(o1.getValue());
            }
        });

        return sortedList;
    }
   
    /**
     * 将邻居矩阵(角标)从排序后的list中抽离出来
     * @param sortedList
     * @return 邻居矩阵(角标)
     */
    public static int[][] getNeighbourMatrixBySortedList(List<Map.Entry<Integer, Double>> sortedList) {
        int[][] neighborMatrx = new int[NEIGHBOUR_MATRIX_ROWS][NEIGHBOUR_MATRIX_COLS];
        for (int i = 0; i < sortedList.size(); i++) {
            Entry<Integer, Double> entry = sortedList.get(i);
            neighborMatrx[0][i] = entry.getKey();
        }
        return neighborMatrx;
    }

    /**
     * 将排序后的矩阵从排序后的list中抽离出来
     * @param sortedList
     * @return 排序后的矩阵
     */
    public static double[][] getSortedMatrixBySortedList(List<Map.Entry<Integer, Double>> sortedList) {
        double[][] sortedMatrix = new double[SORTED_MATRIX_ROWS][SORTED_MATRIX_COLS];
        for (int i = 0; i < sortedList.size(); i++) {
            Entry<Integer, Double> entry = sortedList.get(i);
            sortedMatrix[0][i] = entry.getValue();
        }
        return sortedMatrix;
    }

    /**
     * 使用如下Pearson相似性公式计算,计算目标用户与其他用户间的相似性。
     *
     * @param targetMatrix target矩阵
     * @param otherMatrix  other矩阵
     * @return 通过 Pearson相似性公式 计算后得到的矩阵 SimMatrix
     */
    public static double[][] pearsonMatrixCalc(double[][] targetMatrix, double[][] otherMatrix) {
        double[][] ret = new double[1][otherMatrix.length];

        for (int i = 0; i < otherMatrix.length; i++) {
            // 计算rx,ry的平均数
            double rx_avg = avgInMatrixRowWithout0(targetMatrix, 1);
            double ry_avg = avgInMatrixRowWithout0(otherMatrix, i + 1);

            double molecule = 0;// 初始化分子
            double denominatorX = 0;// 初始化分母左边
            double denominatorY = 0;// 初始化分母右边

            for (int j = 0; j < otherMatrix[i].length; j++) {
                if (targetMatrix[0][j] == 0 || otherMatrix[i][j] == 0) {
                    continue; // 当target或other对于某一项的值为0时,跳过计算
                }
                double tempX = targetMatrix[0][j] - rx_avg; //计算r(x,i)-rx平均值,并存为临时变量,减少重复计算
                double tempY = otherMatrix[i][j] - ry_avg; //计算r(y,i)-ry平均值,并存为临时变量
                molecule += tempX * tempY; // 得出分子之和
                denominatorX += tempX * tempX; //得出未开根号的和
                denominatorY += tempY * tempY; //得出未开根号的和
            }
            //做一个分母的检查
            if (denominatorX == 0 || denominatorY == 0) {
                // 分母为0 给出一个无意义的值
                ret[0][i] = MEANINGLESS_VALUE;
            } else {
                ret[0][i] = molecule / (Math.sqrt(denominatorX) * Math.sqrt(denominatorY));
            }
        }
        return ret;
    }

    /**
     * 使用如下Cosine相似性公式计算,计算目标用户与其他用户间的相似性。
     *
     * @param targetMatrix target矩阵
     * @param otherMatrix  other矩阵
     * @return 计算出来的相似性矩阵 SimMatrix
     */
    public static double[][] cosineMatrixCalc(double[][] targetMatrix, double[][] otherMatrix) {
        double[][] ret = new double[1][otherMatrix.length];
        for (int i = 0; i < otherMatrix.length; i++) {
            double a = 0, b = 0, c = 0;
            for (int j = 0; j < otherMatrix[i].length; j++) {
                if (otherMatrix[i][j] != 0 && targetMatrix[0][j] != 0) {
                    a += otherMatrix[i][j] * targetMatrix[0][j];
                    b += otherMatrix[i][j] * otherMatrix[i][j];
                    c += targetMatrix[0][j] * targetMatrix[0][j];
                }
            }
            if (b != 0 && c != 0) {
                ret[0][i] = a / (Math.sqrt(b) * Math.sqrt(c));
            } else {
                ret[0][i] = MEANINGLESS_VALUE;
            }
        }
        return ret;
    }

    /**
     * 计算某矩阵某行的平均值,去除一个无意义的数
     *
     * @param matrix         被计算的矩阵
     * @param row            需要被计算平均值的行
     * @param meaningLessNum 所要去掉的无意义的数
     * @return 平均值
     */
    public static double avgInMatrixRowWithoutMeaningLessNum(double[][] matrix, int row, int meaningLessNum) {
        double sum = 0;
        int divideCount = 0;
        for (int j = 0; j < matrix[row - 1].length; j++) {
            if (matrix[row - 1][j] == meaningLessNum) {
                continue;
            }
            sum += matrix[row - 1][j];
            divideCount++;
        }
        return sum / divideCount;
    }

    /**
     * 打印二维数组
     *
     * @param arr 数组
     */
    public static void printArrays(double[][] arr) {
        if (arr == null) {
            return;
        }
        for (double[] objects : arr) {
            System.out.println(Arrays.toString(objects));
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值