1.利用决策树进行排序
步骤
- 导入数据;
- 建模
- 训练
- 获取变量重要性得分
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from sklearn.datasets import load_wine
from model_selection import train_test_split
data = load_wine() # 下载数据
'''data.data.shape #数据变量
-> (178, 13)
data.target # 类别
'''
X_train, X_test, Y_train, Y_test = train_test_split(data.data,data.target,random_state=33)
model = DecisionTreeClassifier(random_state=2)
model.fit(X_train,Y_train)
features_importance = model.feature_importances_
# 重要性排序降序
indices = np.argsort(features_importance)[::-1])
# 返回排序后的下标,并取前几个重要的特征
importance = indices[:3]
print(X_train[:,importance])
2. 利用随机森林进行变量选择
from sklearn.ensemble import RandomForestClassifier as RF
import numpy as np
from sklearn.datasets import load_wine
from model_selection import train_test_split
data = load_wine() # 下载数据
'''data.data.shape #数据变量
-> (178, 13)
data.target # 类别
'''
X_train, X_test, Y_train, Y_test = train_test_split