作用:用于对数据精炼,过滤方差低的特征值
1.定义及API
方差越小,特征值越接近,则相关系数(特征与特征之间相关度)越高,越没用
代码实现:[:,1:-2]负二就是倒数第二个
threshold=10的意思是当方差小于10被1剔除。说明相关性比较高
from sklearn.feature_selection import VarianceThreshold
import pandas as pd
def variance_demo():
"""
过滤低方差特征
:return:
"""
# 1、获取数据
data = pd.read_csv(r"C:\Users\羊羊\Desktop\机器视觉\学习资料\黑马程序学习资料机器学习\02-代码\factor_returns.csv")
data = data.iloc[:, 1:-2]
print("data:\n", data)
# 2、实例化一个转换器类
transfer = VarianceThreshold(threshold=10)
# 3、调用fit_transform
data_new = transfer.fit_transform(data)
print("data_new:\n", data_new, data_new.shape)
variance_demo()
结果:
data:
pe_ratio pb_ratio market_cap ... earnings_per_share revenue total_expense
0 5.9572 1.1818 8.525255e+10 ... 2.0100 2.070140e+10 1.088254e+10
1 7.0289 1.5880 8.411336e+10 ... 0.3260 2.930837e+10 2.378348e+10
2 -262.7461 7.0003 5.170455e+08 ... -0.0060 1.167983e+07 1.203008e+07
3 16.4760 3.7146 1.968046e+10 ... 0.3500 9.189387e+09 7.935543e+09
4