数据预处理--标准化 特征降维 删除低方差特征和相关系数

def stand_demo():
    data=pd.read_csv("dating.txt")
    data=data.iloc[:,:3]
    transfer=StandardScaler()
    data_new=transfer.fit_transform(data)
    print(data_new)
    return None
[[0.44832535 0.39805139 0.56233353]
 [0.15873259 0.34195467 0.98724416]
 [0.28542943 0.06892523 0.47449629]
 ...
 [0.29115949 0.50910294 0.51079493]
 [0.52711097 0.43665451 0.4290048 ]
 [0.47940793 0.3768091  0.78571804]]
def variance_demo():
    data=pd.read_csv("factor_returns.csv")
    data=data.iloc[:,1:-2]#包含1不包含-2
    transfer=VarianceThreshold(threshold=0)
    data_new=transfer.fit_transform(data)
    print(data_new,data_new.shape)#shape后面没有括号
if __name__=="__main__":
    variance_demo()
[[ 5.95720000e+00  1.18180000e+00  8.52525509e+10 ...  2.01000000e+00
   2.07014010e+10  1.08825400e+10]
 [ 7.02890000e+00  1.58800000e+00  8.41133582e+10 ...  3.26000000e-01
   2.93083692e+10  2.37834769e+10]
 [-2.62746100e+02  7.00030000e+00  5.17045520e+08 ... -6.00000000e-03
   1.16798290e+07  1.20300800e+07]
 ...
 [ 3.95523000e+01  4.00520000e+00  1.70243430e+10 ...  2.20000000e-01
   1.78908166e+10  1.74929478e+10]
 [ 5.25408000e+01  2.46460000e+00  3.28790988e+10 ...  1.21000000e-01
   6.46539204e+09  6.00900728e+09]
 [ 1.42203000e+01  1.41030000e+00  5.91108572e+10 ...  2.47000000e-01
   4.50987171e+10  4.13284212e+10]] (2318, 9)

 

def variance_demo():
    data=pd.read_csv("factor_returns.csv")
    data=data.iloc[:,1:-2]#包含1不包含-2
    transfer=VarianceThreshold(threshold=10)
    data_new=transfer.fit_transform(data)
    print(data_new,data_new.shape)#shape后面没有括号
if __name__=="__main__":
    variance_demo()
[[ 5.95720000e+00  8.52525509e+10  8.00800000e-01 ...  1.21144486e+12
   2.07014010e+10  1.08825400e+10]
 [ 7.02890000e+00  8.41133582e+10  1.64630000e+00 ...  3.00252062e+11
   2.93083692e+10  2.37834769e+10]
 [-2.62746100e+02  5.17045520e+08 -5.67800000e-01 ...  7.70517753e+08
   1.16798290e+07  1.20300800e+07]
 ...
 [ 3.95523000e+01  1.70243430e+10  3.34400000e+00 ...  2.42081699e+10
   1.78908166e+10  1.74929478e+10]
 [ 5.25408000e+01  3.28790988e+10  2.74440000e+00 ...  3.88380258e+10
   6.46539204e+09  6.00900728e+09]
 [ 1.42203000e+01  5.91108572e+10  2.03830000e+00 ...  2.02066110e+11
   4.50987171e+10  4.13284212e+10]] (2318, 7)

 过滤低方差特征

from scipy.stats import pearsonr
def variance_demo():
    data=pd.read_csv("factor_returns.csv")
    data=data.iloc[:,1:-2]#包含1不包含-2
    transfer=VarianceThreshold(threshold=10)
    data_new=transfer.fit_transform(data)
    print(data_new,data_new.shape)#shape后面没有括号
    #计算相关系数
    r=pearsonr(data["pe_ratio"],data["pb_ratio"])
    print("相关系数",r)
if __name__=="__main__":
    variance_demo()
相关系数 (-0.004389322779936271, 0.8327205496564927)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值