def stand_demo():
data=pd.read_csv("dating.txt")
data=data.iloc[:,:3]
transfer=StandardScaler()
data_new=transfer.fit_transform(data)
print(data_new)
return None
[[0.44832535 0.39805139 0.56233353]
[0.15873259 0.34195467 0.98724416]
[0.28542943 0.06892523 0.47449629]
...
[0.29115949 0.50910294 0.51079493]
[0.52711097 0.43665451 0.4290048 ]
[0.47940793 0.3768091 0.78571804]]
def variance_demo():
data=pd.read_csv("factor_returns.csv")
data=data.iloc[:,1:-2]#包含1不包含-2
transfer=VarianceThreshold(threshold=0)
data_new=transfer.fit_transform(data)
print(data_new,data_new.shape)#shape后面没有括号
if __name__=="__main__":
variance_demo()
[[ 5.95720000e+00 1.18180000e+00 8.52525509e+10 ... 2.01000000e+00
2.07014010e+10 1.08825400e+10]
[ 7.02890000e+00 1.58800000e+00 8.41133582e+10 ... 3.26000000e-01
2.93083692e+10 2.37834769e+10]
[-2.62746100e+02 7.00030000e+00 5.17045520e+08 ... -6.00000000e-03
1.16798290e+07 1.20300800e+07]
...
[ 3.95523000e+01 4.00520000e+00 1.70243430e+10 ... 2.20000000e-01
1.78908166e+10 1.74929478e+10]
[ 5.25408000e+01 2.46460000e+00 3.28790988e+10 ... 1.21000000e-01
6.46539204e+09 6.00900728e+09]
[ 1.42203000e+01 1.41030000e+00 5.91108572e+10 ... 2.47000000e-01
4.50987171e+10 4.13284212e+10]] (2318, 9)
def variance_demo():
data=pd.read_csv("factor_returns.csv")
data=data.iloc[:,1:-2]#包含1不包含-2
transfer=VarianceThreshold(threshold=10)
data_new=transfer.fit_transform(data)
print(data_new,data_new.shape)#shape后面没有括号
if __name__=="__main__":
variance_demo()
[[ 5.95720000e+00 8.52525509e+10 8.00800000e-01 ... 1.21144486e+12
2.07014010e+10 1.08825400e+10]
[ 7.02890000e+00 8.41133582e+10 1.64630000e+00 ... 3.00252062e+11
2.93083692e+10 2.37834769e+10]
[-2.62746100e+02 5.17045520e+08 -5.67800000e-01 ... 7.70517753e+08
1.16798290e+07 1.20300800e+07]
...
[ 3.95523000e+01 1.70243430e+10 3.34400000e+00 ... 2.42081699e+10
1.78908166e+10 1.74929478e+10]
[ 5.25408000e+01 3.28790988e+10 2.74440000e+00 ... 3.88380258e+10
6.46539204e+09 6.00900728e+09]
[ 1.42203000e+01 5.91108572e+10 2.03830000e+00 ... 2.02066110e+11
4.50987171e+10 4.13284212e+10]] (2318, 7)
过滤低方差特征
from scipy.stats import pearsonr
def variance_demo():
data=pd.read_csv("factor_returns.csv")
data=data.iloc[:,1:-2]#包含1不包含-2
transfer=VarianceThreshold(threshold=10)
data_new=transfer.fit_transform(data)
print(data_new,data_new.shape)#shape后面没有括号
#计算相关系数
r=pearsonr(data["pe_ratio"],data["pb_ratio"])
print("相关系数",r)
if __name__=="__main__":
variance_demo()
相关系数 (-0.004389322779936271, 0.8327205496564927)