pearsonr(x,y)、corr()、corrcoef(u1) 相关系数计算

函数:pearsonr(x,y)

功能:
计算特征与目标变量之间的相关度

参数说明:
1)输入:x为特征,y为目标变量.
2)输出:r: 相关系数 [-1,1]之间,p-value: p值。
注: p值越小,表示相关系数越显著,一般p值在500个样本以上时有较高的可靠性。

pearson相关系数的计算公式为:
在这里插入图片描述
cov(X,Y)表示的是协方差
var(x)和var(y)表示的是方差

python实现

import numpy as np
from scipy.stats import pearsonr
import random
np.random.seed(0)
size=300
x=np.random.normal(0,1,size)
print "Lower noise",pearsonr(x,x+np.random.normal(0,1,size))
print "Higher noise",pearsonr(x,x+np.random.normal(0,10,size))

输出:

Lower noise (0.71824836862138408, 7.3240173129983507e-49)
Higher noise (0.057964292079338155, 0.31700993885324752)

用pandas计算相关系数

corr()求解变量相关系数

df.head()
	Guba	XQ		BCI		Count	Value
0	0.021	0.098	0.175	0.077	0.057
1	0.031	0.097	0.192	0.087	0.069
2	0.018	0.101	0.193	0.075	0.069
3	0.017	0.112	0.203	0.077	0.063
4	0.042	0.158	0.222	0.335	0.567

#1.pearson相关系数
df.corr()
		Guba		XQ			BCI			Count		Value
Guba	1.000000	0.175604	-0.014611	0.200896	0.256166
XQ		0.175604	1.000000	-0.390358	0.654250	0.482809
BCI		-0.014611	-0.390358	1.000000	-0.259319	-0.156440
Count	0.200896	0.654250	-0.259319	1.000000	0.832961
Value	0.256166	0.482809	-0.156440	0.832961	1.000000

#2.Kendall Tau相关系数
df.corr('kendall')
		Guba		XQ			BCI			Count		Value
Guba	1.000000	0.153904	-0.012438	0.133122	0.090707
XQ		0.153904	1.000000	-0.244304	0.374908	0.255377
BCI		-0.012438	-0.244304	1.000000	-0.157442	-0.091950
Count	0.133122	0.374908	-0.157442	1.000000	0.720916
Value	0.090707	0.255377	-0.091950	0.720916	1.000000

#3.spearman秩相关
df.corr('spearman')
		Guba		XQ			BCI			Count		Value
Guba	1.000000	0.219124	-0.017204	0.189752	0.143163
XQ		0.219124	1.000000	-0.358981	0.563938	0.427756
BCI		-0.017204	-0.358981	1.000000	-0.241880	-0.140010
Count	0.189752	0.563938	-0.241880	1.000000	0.877732
Value	0.143163	0.427756	-0.140010	0.877732	1.000000

#4.显著性检验
import scipy.stats as stats
#输出结果第一个值为pearsonr相关系数,
#第二个为p-value,所以这里Guba列和Value值是显著相关的
stats.pearsonr(df['Guba'],df['Value'])
(0.256165703418037, 8.10519823509109e-07)

计算两列数据相关系数

import pandas as pd
import pylab as plt
#每小时的阵风风速平均值
all_gust_spd_mean_list = [8.21529411764706, 7.872941176470587, 7.829411764705882, 8.354117647058825, 9.025882352941174, 9.384523809523811, 9.57294117647059, 9.274117647058821, 9.050588235294118, 9.314117647058827, 8.924705882352939, 9.25176470588235, 8.978823529411764, 8.39176470588235, 7.715294117647061, 7.477647058823529, 7.272941176470586, 7.38470588235294, 7.396470588235295, 7.97261904761905, 7.716666666666666, 7.7809523809523835, 7.816666666666668, 7.897590361445783, 8.200000000000001, 8.04761904761905, 7.474999999999999, 9.855952380952383, 11.120000000000001, 10.979761904761906, 10.922619047619051, 10.841176470588234, 9.31566265060241, 8.867058823529415, 9.068235294117642, 8.774698795180722, 8.629411764705884, 8.292941176470586, 7.640000000000007, 7.422352941176469, 7.464705882352944, 8.210588235294113, 8.558823529411763, 8.93095238095238, 9.001176470588234, 8.538095238095238, 8.965882352941172, 9.855294117647057, 8.318918918918921, 9.217647058823525, 8.86470588235294, 8.840000000000002, 9.44235294117647, 9.352380952380953, 9.307058823529408, 9.64047619047619, 9.408333333333333, 9.585882352941175, 8.901190476190477, 7.698823529411764, 7.988235294117645, 9.091764705882353, 9.294117647058819, 8.996470588235297, 9.63764705882353, 9.091764705882353, 8.937647058823533, 8.838823529411764, 8.637647058823534, 8.46, 8.374117647058824, 8.24117647058823, 8.245238095238093, 8.365882352941174, 8.50235294117647, 8.291764705882352, 8.088235294117647, 7.889411764705883, 7.594117647058826, 7.216470588235293, 7.097647058823533, 7.305882352941181, 7.489411764705882, 6.815294117647058, 7.971428571428569, 7.424705882352936, 6.910588235294117, 6.071764705882354, 7.44117647058823, 7.667857142857143, 7.881176470588237, 7.929411764705881, 8.12142857142857, 8.822352941176472, 9.083529411764703, 9.028235294117646, 9.310714285714285, 9.035294117647057, 8.450588235294116, 8.414285714285713, 7.311764705882355, 6.840000000000001, 7.238095238095239, 6.641176470588236, 6.8047619047619055, 6.58705882352941, 6.826190476190474, 6.568235294117643, 7.060000000000001, 7.686904761904761, 8.348235294117643, 8.503529411764701, 8.287058823529414, 8.354117647058823, 7.624705882352941, 7.286904761904765, 7.361176470588235, 7.477647058823531, 7.343529411764706]
 
#每小时的阵风风向标准差
all_gust_agl_dev_list = [0.7507438242046189, 0.768823513771462, 0.849877567310481, 0.8413581558472801, 0.8571319461950748, 0.8665002025305942, 0.9053739533298005, 0.8866979720735791, 0.8045677876888446, 0.873463882661469, 0.832383480871403, 0.778659970340069, 0.7357031045047981, 0.7974723911258534, 0.8039727543149432, 0.8709723763624072, 0.8727745464337923, 0.7896422160341138, 0.8165093346129041, 0.8821296270775546, 0.9193591477905156, 0.8546566314487358, 0.8595040204296921, 0.8075641299052398, 0.7996745617071098, 0.7930869411601498, 0.7578880032016914, 0.9107571156507569, 0.8461201382346486, 0.7553646348127085, 0.8510861123303187, 0.7282631202385544, 0.8588017730198183, 0.7923449370076744, 0.8265083209111689, 0.9599970229643688, 0.8195276021290412, 0.7882592259148272, 0.8036464793287409, 0.8237184691421926, 0.8846862360656914, 0.8136869244513337, 0.8516383375155133, 0.7760301715652644, 0.8644231334629017, 0.831330440569484, 0.8061342111854616, 0.7345896810176235, 1.205089147978776, 0.8266315966774649, 0.8137345300107962, 0.8186966603954983, 0.7836182115343135, 0.8406438908681332, 0.7717723331806998, 0.7932664155269176, 0.7266183593077442, 0.719063143819583, 0.8846434855533486, 0.817552510948495, 0.7571575934024827, 0.865326265251608, 0.9099784335052563, 0.8591794583996128, 0.9295389095340467, 0.8787300860744375, 0.8724277968300532, 0.95284132003256, 0.9288772059881606, 0.8690944948691984, 0.8327213470469693, 0.8339075062700629, 0.886835675339985, 0.8439137877550847, 0.7985495396895048, 0.8406267016063169, 0.8477871130878305, 0.8844025576348077, 0.9186363354492758, 0.8888539157167654, 0.9079462071375304, 0.8699806402308554, 0.8531937701209343, 0.8833108936555343, 0.9317958602705915, 0.9393618445471649, 0.9556065912926689, 0.967220118643412, 0.8882194173154115, 0.9361538853249073, 0.7872261833965604, 0.8608377368219552, 0.8787718518619395, 0.8169189082396561, 0.7965901553530427, 0.8838665737610132, 0.8844338861256802, 0.9008484784943429, 0.8612318707072047, 0.8623792153658019, 1.0033494995180463, 0.9901213381586231, 0.8780115045650467, 0.9172682690843976, 0.9653905755824115, 0.9199829176728873, 0.9180048223906779, 0.9172043382441968, 0.9267783259554074, 0.9231225672912022, 0.7945054721199195, 0.8655558517080688, 0.8306327906597787, 0.8457559701865576, 0.8038459124570336, 0.8519646989317945, 0.7735358658599594, 0.8612134954656397, 0.8879135146161856]
 
g_s_m = pd.Series(all_gust_spd_mean_list) #利用Series将列表转换成新的、pandas可处理的数据
g_a_d = pd.Series(all_gust_agl_dev_list)
 
corr_gust = round(g_s_m.corr(g_a_d), 4) #计算标准差,round(a, 4)是保留a的前四位小数
 
print('corr_gust :', corr_gust)
 
#最后画一下两列表散点图,直观感受下,结合相关系数揣摩揣摩
plt.scatter(all_gust_spd_mean_list, all_gust_agl_dev_list)
plt.title('corr_gust :' + str(corr_gust), fontproperties='SimHei') #给图写上title
plt.show()

根据以上程序,得到结果:

corr_gust : -0.3481

计算矩阵数据相关系数矩阵

import pandas as pd
import numpy as np
 
if __name__ == '__main__':
    unstrtf_lst = [[2.136, 1.778, 1.746, 2.565, 1.873, 2.413, 1.813, 1.72, 1.932, 1.987, 2.035, 2.178, 2.05, 2.016, 1.645, 1.756, 1.886, 2.106, 2.138, 1.914, 1.984, 1.906, 1.871, 1.939, 1.81, 1.93, 1.898, 1.802, 2.008, 1.724, 1.823, 1.636, 1.774, 2.055, 1.934, 1.629, 2.519, 2.093, 2.004, 1.793, 1.564, 1.962, 2.176, 1.846, 1.816, 2.018, 1.708, 2.465, 1.899, 1.523, 1.41, 2.102, 2.065, 2.402, 2.091, 1.867, 1.77, 1.466, 2.029, 1.659, 1.626, 1.977, 1.837, 2.13, 2.241, 2.184, 2.345, 1.833, 2.113, 1.764, 1.859, 1.868, 1.835, 1.906, 2.237, 1.846, 1.871, 1.769, 1.928, 1.831, 1.875, 2.039, 2.24, 1.835, 1.851]
    , [2.171, 1.831, 1.714, 2.507, 1.793, 2.526, 1.829, 1.705, 1.954, 2.017, 2.022, 2.16, 2.059, 1.966, 1.661, 1.752, 1.884, 2.203, 2.182, 1.97, 2.003, 1.875, 1.852, 1.884, 1.774, 1.916, 1.936, 1.809, 1.926, 1.717, 1.841, 1.59, 1.781, 2.016, 1.898, 1.657, 2.458, 2.134, 2.032, 1.785, 1.575, 1.959, 2.11, 1.854, 1.826, 1.992, 1.706, 2.419, 1.854, 1.514, 1.37, 2.084, 2.024, 2.398, 1.955, 1.859, 1.759, 1.441, 2.059, 1.653, 1.583, 1.987, 1.84, 2.106, 2.262, 2.13, 2.371, 1.776, 2.117, 1.733, 1.814, 1.839, 1.822, 1.883, 2.23, 1.803, 1.894, 1.783, 1.911, 1.813, 1.85, 2.004, 2.191, 1.823, 1.809]
    , [2.157, 1.873, 1.802, 2.761, 1.733, 2.506, 1.842, 1.765, 1.938, 2.058, 1.932, 2.196, 2.004, 2.126, 1.664, 1.698, 1.899, 2.073, 2.117, 2.083, 1.972, 1.969, 1.865, 1.937, 1.752, 1.939, 1.927, 1.804, 2.07, 1.725, 1.846, 1.5, 1.804, 2.1, 1.932, 1.773, 2.431, 2.088, 2.08, 1.812, 1.592, 1.953, 2.044, 2.019, 1.846, 2.061, 1.771, 2.254, 1.891, 1.536, 1.356, 1.952, 2.222, 2.427, 2.015, 1.873, 1.79, 1.384, 1.981, 1.665, 1.815, 2.006, 1.869, 2.102, 2.249, 2.27, 2.296, 1.814, 2.099, 1.702, 1.688, 1.89, 1.82, 1.927, 2.162, 1.825, 1.998, 1.811, 2.0, 1.842, 1.793, 2.115, 2.301, 1.789, 1.826]
    , [2.127, 1.744, 1.747, 2.548, 1.939, 2.296, 1.808, 1.71, 1.901, 1.906, 2.074, 2.167, 2.113, 2.044, 1.632, 1.821, 1.94, 2.076, 2.114, 1.837, 1.978, 1.904, 1.872, 1.98, 1.886, 1.923, 1.875, 1.799, 1.992, 1.704, 1.812, 1.715, 1.756, 2.061, 1.94, 1.554, 2.592, 2.065, 1.983, 1.802, 1.57, 1.955, 2.215, 1.765, 1.796, 2.006, 1.662, 2.573, 1.915, 1.543, 1.439, 2.16, 2.012, 2.42, 2.268, 1.886, 1.767, 1.527, 2.073, 1.65, 1.567, 2.016, 1.819, 2.153, 2.225, 2.237, 2.327, 1.877, 2.115, 1.804, 1.939, 1.867, 1.84, 1.905, 2.302, 1.883, 1.798, 1.725, 1.893, 1.846, 1.916, 2.025, 2.268, 1.867, 1.877]
    , [2.089, 1.664, 1.72, 2.441, 2.031, 2.321, 1.773, 1.702, 1.935, 1.968, 2.119, 2.191, 2.023, 1.925, 1.621, 1.75, 1.822, 2.074, 2.139, 1.764, 1.982, 1.873, 1.895, 1.955, 1.829, 1.945, 1.853, 1.794, 2.046, 1.75, 1.793, 1.741, 1.752, 2.042, 1.965, 1.532, 2.598, 2.086, 1.923, 1.771, 1.517, 1.98, 2.338, 1.743, 1.794, 2.014, 1.693, 2.618, 1.938, 1.5, 1.476, 2.216, 2.003, 2.361, 2.13, 1.85, 1.764, 1.513, 2.001, 1.669, 1.538, 1.897, 1.819, 2.163, 2.226, 2.099, 2.386, 1.865, 2.121, 1.818, 2.0, 1.876, 1.858, 1.908, 2.254, 1.874, 1.791, 1.759, 1.908, 1.822, 1.944, 2.012, 2.201, 1.863, 1.892]
    ]
 
    column_lst = ['whole_year', 'spring', 'summer', 'autumn', 'winter']
 
    # 计算列表两两间的相关系数
    data_dict = {} # 创建数据字典,为生成Dataframe做准备
    for col, gf_lst in zip(column_lst, unstrtf_lst):
        data_dict[col] = gf_lst
 
    unstrtf_df = pd.DataFrame(data_dict)
    cor1 = unstrtf_df.corr() # 计算相关系数,得到一个矩阵
    print(cor1)
    print(unstrtf_df.columns.tolist())

结果如下:

            whole_year    spring    summer    autumn    winter
whole_year    1.000000  0.986011  0.943254  0.980358  0.965415
spring        0.986011  1.000000  0.944394  0.945710  0.930887
summer        0.943254  0.944394  1.000000  0.876008  0.833568
autumn        0.980358  0.945710  0.876008  1.000000  0.977426
winter        0.965415  0.930887  0.833568  0.977426  1.000000

用numpy计算相关系数

import numpy as np
# 这里u1是一个矩阵,可以自己构造,也可以来自dataframe类型:比如u1=a_df.values
np.corrcoef(u1) # 计算矩阵所有行的相关系数
np.corrcoef(u1.T) # 计算矩阵所有列的相关系数
np.around(np.corrcoef(u1), decimals=3) # 这里是将矩阵结果保留3位小数
  • 6
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值