一、简介
在机器学习中,当要预测不同的机器学习算法在同一个学习任务上的性能时,需要使用序相关系数对真实的性能排序与预测的性能排序进行比较,本文介绍了其中一种秩相关系数——斯皮尔曼等级相关性。
公式:
其中:di=xi-yi表示两个排序之间的差值;
n:表示样本的大小,即机器学习算法的数量;
二、代码实现
1.计算斯皮尔曼等级相关性
package recommendation.featureExtraction;
import java.math.BigDecimal;
/**
* spearman rank correlation
* refer to soruce from http://en.wikipedia.org/wiki/Spearman_rank_correlation
* @author LiuKai 2014.07.06
*/
public class SpearmanRankCorrelation {
/*
* rang between [-1, 1];
* Inappropriate static <<Clean code: A Handbook of Agile Software Craftsmanship>>.
* "There is almonst no chance that we'd want Math.max to be polymorphic"
*/
public static double correlation(int[] base, int[] other) {
if (base.length != other.length) {
System.err.println("The length of array base and other must be equal!");
return 0.0;
}
if (base.length <= 1) {
System.err.println("The length of both of the arrays must be equal!");
System.exit(-1);
}
BigDecimal bg = new BigDecimal(1-(6.0*sumOfSquareDiff(base, other)/(power(base.length, 3)-base.length)));
return bg.setScale(4, BigDecimal.ROUND_HALF_UP).doubleValue();
}
private static double sumOfSquareDiff(int[] a, int[] b) {
double sum = 0.0;
for (int i = 0; i < a.length; i++)
sum += power((a[i]-b[i]), 2);
return sum;
}
/*
* Method should do one thing
* <<Clean code: A handbook of Agile software craftsmanship>>
*/
private static double power(double base, int exp) {
if (base == 0)
return 0.0;
double result = 1.0;
for (int i = 0; i < exp; i++)
result *= base;
return result;
}
}
2.测试斯皮尔曼等级相关系数
package recommendation.featureExtraction;
/**
* Test SpearmanRankCorrelation.class
* @author Administrator
*
*/
public class TestSpearman {
public static void main(String[] args) {
int[] base = {1, 2, 3, 4, 5};
int[] other1 = {1, 2, 3, 4, 5};
int[] other2 = FisherYatesShuffle.shuffle(base.clone());
int[] other3 = {5, 4, 3, 2, 1};
System.out.println(SpearmanRankCorrelation.correlation(base, other1));
System.out.println(SpearmanRankCorrelation.correlation(base, other2));
System.out.println(SpearmanRankCorrelation.correlation(base, other3));
}
}
3.辅助类费雪耶兹随机置乱算法
具体见另一篇博客:http://blog.csdn.net/lhkaikai/article/details/25627161
package recommendation.featureExtraction;
import java.util.Random;
/*
* Fisher-Yates shuffle, also known as the Knuth shuffle, is an algorithm for generating
* a random permutation of a finite set-in plain terms, for randomly shuffling the set.
*/
public class FisherYatesShuffle {
public static int[] shuffle(int[] array) {
for (int i = array.length - 1; i > 0; i--) {
int rand = (new Random()).nextInt(i+1);
int temp = array[i];
array[i] = array[rand];
array[rand] = temp;
}
return array;
}
public static void main(String[] args) {
int[] array1= {1, 2, 3, 4, 5};
int[] array2 = shuffle(array1);
for(int elem: array2)
System.out.print(elem + " ");
System.out.println();
}
}