1、低方差特征过滤
2、API
ex_1
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
def variance_demo():
''''
过滤低方差特征
'''
data = pd.read_csv("factor_returns.csv")
data = data.iloc[:,1:-2]
transfer = VarianceThreshold()
data_new = transfer.fit_transform(data)
print("data: \n", data)
print("\n")
print("data_new:\n", data_new)
return None
3、相关系数
1)公式
用于寻找哪两个特征相关性强
2)特点:
3)API
ex_2
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
from scipy.stats import pearsonr
def variance_demo():
''''
过滤低方差特征
'''
data = pd.read_csv("factor_returns.csv")
data = data.iloc[:,1:-2]
transfer = VarianceThreshold()
data_new = transfer.fit_transform(data)
print("data: \n", data)
print("")
print("data_new:\n", data_new)
r = pearsonr(data["pe_ratio"],data["pb_ratio"])
print("相关系数:\n", r)
return None