期望值,方差,标准差,协方差,相关系数
package com.chipmunk.java.base;
public class AlgorithmUtil {
/**
* 平均值
*/
public static double getAverageValue(double[]arr){
int len = arr.length;
double sum = 0;
for (double d : arr) {
sum+=d;
}
double av = sum/len;
return av;
}
/**
* 方差:样本方差是n-1,总体方差是n
*/
public static double getVariance(double[]arr){
double av = getAverageValue(arr);
double sum = 0;
for (double d : arr) {
double c = d-av;
double p = Math.pow(c, 2);
// System.out.println(c+"--"+p);
sum+=p;
}
int len = arr.length;
double v = sum/len;//样本方差是len-1,总体方差是len
return v;
}
/**
* 标准差或均方差
*/
public static double getStandardDeviation(double[]arr){
double v = getVariance(arr);
double sd = Math.sqrt(v);
return sd;
}
/**
* 期望值E(X)=sum[i:1~n]Xi*Pi
* @param arr
* @return
*/
public static double getExpectation(double[]arr_x,double[]arr_p){
int len_x = arr_x.length;
int len_p = arr_p.length;
double ex = 0;
if (len_x==len_p) {
for (int i = 0; i < len_x; i++) {
double x = arr_x[i];
double p = arr_p[i];
ex+=x*p;
}
}
return ex;
}
/**
* 协方差cov(X,Y)=E[XY]-E[X]E[Y]
* @param arr
* Cov(X,X)=D(X),Cov(Y,Y)=D(Y)。
* 此处期望值是平均数
* @return
*/
public static double getCovariance(double[]arr_a,double[]arr_b){
double ex_a = getAverageValue(arr_a);//期望值是平均数
double ex_b = getAverageValue(arr_b);
// System.out.println(ex_a);
// System.out.println(ex_b);
double ex_ab = 0;
int len_a = arr_a.length;
int len_b = arr_b.length;
if (len_a==len_b) {
for (int i = 0; i < len_a; i++) {
double a = arr_a[i];
double b = arr_b[i];
ex_ab+=a*b;
}
ex_ab=ex_ab/len_a;
}
// System.out.println(ex_ab);
double cov = ex_ab-ex_a*ex_b;
return cov;
}
/**
*
* 相关系数 correlation coefficient
* Pxy=cov(X,Y)/sqrt(D(X)*D(Y))
* 公式中Cov(X,Y)为X,Y的协方差,D(X)、D(Y)分别为X、Y的方差。
*
* 相关系数(r)的定义如下图所示,取值范围为[-1,1],r>0表示正相关,r<0表示负相关,|r|表示了变量之间相关程度的高低。
* 特殊地,r=1称为完全正相关,r=-1称为完全负相关,r=0称为不相关。通常|r|大于0.8时,认为两个变量有很强的线性相关性。
*
*/
public static double getCorrelationCoefficient(double[]arr_a,double[]arr_b){
double cov = getCovariance(arr_a, arr_b);
double dx_a = getVariance(arr_a);
double dx_b = getVariance(arr_b);
double c = Math.sqrt(dx_a*dx_b);
double pxy = cov/c;
return pxy;
}
public static void main(String[] args) {
double[]arr1 = new double[]{1.2,2.3,4.2,6.3,2.6};
double[]arr2 = new double[]{1.2,1.3,1.2,1.3,1.6};
double[]arr3 = new double[]{2.4,2.6,2.4,2.6,5.2};
// System.out.println(getVariance(arr1));
System.out.println(getCovariance(arr1, arr2));
System.out.println(getCorrelationCoefficient(arr1, arr2));
}
}