import numpy as np
#pandas主要作用是表结构DataFrame,另外嵌入numpy纯数学
import pandas as pd
#scipy是基于numpy纯数学,他的区别是有数学模型(正太什么的,比较直接0
import scipy.stats as ss
df = pd.DataFrame({"A":ss.norm.rvs(size=10),"B":ss.norm.rvs(size=10),"C":ss.norm.rvs(size=10 ),"D":np.random.randint(low=0,high=2,size=10)})#最小值可以取到,但是最大值取不到D为标注
print(df)
data=df.values
#引入SVR回归器
from sklearn.svm import SVR
#引入决策树回归器
from sklearn.tree import DecisionTreeRegressor
X=data[:,0:3]#特征属性
Y=data[:,3]#标注
#引入特征选择思路的几个方法第一种是过滤思想第二种是包裹思想第三种是嵌入思想
from sklearn.feature_selection import SelectKBest,RFE,SelectFromModel
#过滤思想
skb = SelectKBest(k=2)#这里我猜测是选两个与标注关系最相关(这个过滤思想默认是F分布也就是方差分布)
print(skb.fit(X,Y))#进行拟合
print(skb.transform(X))#拟合过后进行转换
#RFE思想
rfe = RFE(estimator=SVR(kernel="linear"),n_features_to_select=2,step=1)#step是每次迭代删除一个属性
print(rfe.fit_transform(X,Y))
#嵌入思想
sfm= SelectFromModel(estimator=DecisionTreeRegressor(),threshold=0.1)#低于阈值0.1被去掉(这里没有指定留下几个,去掉几个)
print(sfm.fit_transform(X,Y))
D:\anaconda\python.exe G:/pycharm/untitled/lesson1/BigData/2.py
A B C D
0 -1.113453 -1.918568 -0.469903 1
1 0.020019 -0.051674 -0.688106 1
2 1.169084 -0.774108 -0.007191 1
3 0.209859 1.060492 -1.378050 1
4 0.942913 -0.574865 0.320743 0
5 -1.764901 -0.582284 -0.822354 0
6 1.803009 -0.411108 -1.571471 0
7 0.272782 0.144740 -0.802366 0
8 -0.183557 0.297150 -1.445777 0
9 -0.173935 -0.306051 0.577133 0
SelectKBest(k=2, score_func=<function f_classif at 0x00000224F6C1C5E8>)
[[-1.11345295 -1.91856772]
[ 0.02001948 -0.05167408]
[ 1.16908433 -0.77410847]
[ 0.2098592 1.06049168]
[ 0.94291254 -0.57486482]
[-1.76490117 -0.58228386]
[ 1.80300906 -0.4111079 ]
[ 0.2727825 0.1447399 ]
[-0.18355694 0.29714988]
[-0.173935 -0.30605105]]
[[-1.11345295 -1.91856772]
[ 0.02001948 -0.05167408]
[ 1.16908433 -0.77410847]
[ 0.2098592 1.06049168]
[ 0.94291254 -0.57486482]
[-1.76490117 -0.58228386]
[ 1.80300906 -0.4111079 ]
[ 0.2727825 0.1447399 ]
[-0.18355694 0.29714988]
[-0.173935 -0.30605105]]
[[-1.91856772 -0.46990292]
[-0.05167408 -0.6881059 ]
[-0.77410847 -0.00719142]
[ 1.06049168 -1.37804969]
[-0.57486482 0.32074305]
[-0.58228386 -0.82235441]
[-0.4111079 -1.57147097]
[ 0.1447399 -0.80236576]
[ 0.29714988 -1.44577728]
[-0.30605105 0.57713315]]
5-05特征选择代码
最新推荐文章于 2024-10-08 08:56:16 发布