相关系数的取值在[0,1]
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.feature_selection import VarianceThreshold
from scipy.stats import pearsonr
import pandas as pd
def Character():
#过滤低方差特征
#1,获取数据
data = pd.read_csv(“simlization.txt”)
# 2,实例化一个特征
new_data = data.iloc[:, :3]
transfor = VarianceThreshold(threshold=5)
transfor.fit_transform(new_data)
print("data:\n", new_data)
#计算 某两个数据之间的相关性
r = pearsonr(data["milose"],data["liters"])
print("相关性系数:\n",r)
return None
if name == ‘main’:
Character()
处理方法:
1,两个相关性较高的因素,可以选取其中一个即可
2,加权平均
3,主成成分分析