一、简介
在机器学习中,当要预测不同的机器学习算法在同一个学习任务上的性能时,需要使用序相关系数对真实的性能排序与预测的性能排序进行比较,本文介绍了其中一种秩相关系数——斯皮尔曼等级相关性。
公式:
其中:di=xi-yi表示两个排序之间的差值;
n:表示样本的大小,即机器学习算法的数量;
二、代码实现
1.计算斯皮尔曼等级相关性
- package recommendation.featureExtraction;
- import java.math.BigDecimal;
- /**
- * spearman rank correlation
- * refer to soruce from http://en.wikipedia.org/wiki/Spearman_rank_correlation
- * @author LiuKai 2014.07.06
- */
- public class SpearmanRankCorrelation {
- /*
- * rang between [-1, 1];
- * Inappropriate static <<Clean code: A Handbook of Agile Software Craftsmanship>>.
- * "There is almonst no chance that we'd want Math.max to be polymorphic"
- */
- public static double correlation(int[] base, int[] other) {
- if (base.length != other.length) {
- System.err.println("The length of array base and other must be equal!");
- return 0.0;
- }
- if (base.length <= 1) {
- System.err.println("The length of both of the arrays must be equal!");
- System.exit(-1);
- }
- BigDecimal bg = new BigDecimal(1-(6.0*sumOfSquareDiff(base, other)/(power(base.length, 3)-base.length)));
- return bg.setScale(4, BigDecimal.ROUND_HALF_UP).doubleValue();
- }
- private static double sumOfSquareDiff(int[] a, int[] b) {
- double sum = 0.0;
- for (int i = 0; i < a.length; i++)
- sum += power((a[i]-b[i]), 2);
- return sum;
- }
- /*
- * Method should do one thing
- * <<Clean code: A handbook of Agile software craftsmanship>>
- */
- private static double power(double base, int exp) {
- if (base == 0)
- return 0.0;
- double result = 1.0;
- for (int i = 0; i < exp; i++)
- result *= base;
- return result;
- }
- }
- package recommendation.featureExtraction;
- /**
- * Test SpearmanRankCorrelation.class
- * @author Administrator
- *
- */
- public class TestSpearman {
- public static void main(String[] args) {
- int[] base = {1, 2, 3, 4, 5};
- int[] other1 = {1, 2, 3, 4, 5};
- int[] other2 = FisherYatesShuffle.shuffle(base.clone());
- int[] other3 = {5, 4, 3, 2, 1};
- System.out.println(SpearmanRankCorrelation.correlation(base, other1));
- System.out.println(SpearmanRankCorrelation.correlation(base, other2));
- System.out.println(SpearmanRankCorrelation.correlation(base, other3));
- }
- }
具体见另一篇博客:http://blog.csdn.net/lhkaikai/article/details/25627161
- package recommendation.featureExtraction;
- import java.util.Random;
- /*
- * Fisher-Yates shuffle, also known as the Knuth shuffle, is an algorithm for generating
- * a random permutation of a finite set-in plain terms, for randomly shuffling the set.
- */
- public class FisherYatesShuffle {
- public static int[] shuffle(int[] array) {
- for (int i = array.length - 1; i > 0; i--) {
- int rand = (new Random()).nextInt(i+1);
- int temp = array[i];
- array[i] = array[rand];
- array[rand] = temp;
- }
- return array;
- }
- public static void main(String[] args) {
- int[] array1= {1, 2, 3, 4, 5};
- int[] array2 = shuffle(array1);
- for(int elem: array2)
- System.out.print(elem + " ");
- System.out.println();
- }
- }