特征选择之计算特征重要性（二）：树森林 (forests of trees)

最新推荐文章于 2022-10-29 15:34:56 发布

Bill_zhang5

最新推荐文章于 2022-10-29 15:34:56 发布

阅读量879

点赞数

分类专栏：特征选择数据预处理 Python

本文链接：https://blog.csdn.net/Bill_zhang5/article/details/85675901

版权

Python 同时被 3 个专栏收录

22 篇文章 1 订阅

订阅专栏

数据预处理

13 篇文章 0 订阅

订阅专栏

特征选择

2 篇文章 0 订阅

订阅专栏

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb

def feature_importance_randomforest(input_x, input_y):
    
    
    forest = RandomForestClassifier(n_estimators =250,
                              random_state=0)

    forest.fit(input_x, input_y)
    importances = forest.feature_importances_

    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
             axis=0)

    indices = np.argsort(importances)[::-1]

    #print the feature ranking
    print("Feature ranking:")

    for f in range(X.shape[1]):
        print("%d. feature %d (%f)" %(f+1, indices[f], importances[indices[f]]))


    #Plot the feature importance of the forest
    plt.figure(figsize=(24,16))
    plt.title("Feature importances")

    plt.bar(range(X.shape[1]),importances[indices],
        color="r", yerr=std[indices], align="center")
    plt.xticks(range(X.shape[1]), indices)
    plt.xlim([-1,X.shape[1]])
    plt.show()
    
    
def feature_importance_lgb(train_all):
    params = {
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': {'auc'},
        'max_depth': 4,
        'min_child_weight': 6,
        'num_leaves': 16,
        'learning_rate': 0.02,
        'feature_fraction': 0.7,
        'bagging_fraction': 0.7,
        'bagging_freq': 5,
    }
    
    forest = lgb.train(params,
                      train_all,
                      num_boost_round=450,
                      valid_sets=train_all,
                      early_stopping_rounds=100,
                      verbose_eval=100)


    importances = forest.feature_importances_

    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
             axis=0)

    indices = np.argsort(importances)[::-1]

    #print the feature ranking
    print("Feature ranking:")

    for f in range(X.shape[1]):
        print("%d. feature %d (%f)" %(f+1, indices[f], importances[indices[f]]))


    #Plot the feature importance of the forest
    plt.figure(figsize=(24,16))
    plt.title("Feature importances")

    plt.bar(range(X.shape[1]),importances[indices],
        color="r", yerr=std[indices], align="center")
    plt.xticks(range(X.shape[1]), indices)
    plt.xlim([-1,X.shape[1]])
    plt.show()