核心代码:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV
# 省略了训练集,测试集的划分 x_train,y_train,x_test,y_test
'RandomForestClassifier 没有属性 coef,将RandomForestClassifier改进以适用于RFECV所做的工作'
class RandomForestClassifierWithCoef(RandomForestClassifier):
def fit(self, *args, **kwargs):
super(RandomForestClassifierWithCoef, self).fit(*args, **kwargs)
self.coef_= self.feature_importances_
rf = RandomForestClassifierWithCoef(n_estimators=500, min_samples_leaf=5, n_jobs=-1)
rfecv = RFECV(estimator=rf, step=1, scoring='accuracy',cv=2)
selector = rfecv.fit(x_train, y_train)
print('RFECV 选择出的特征个数 :' , rfecv.n_features_) #