RF评价特征重要度,画出特征排行
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import classification_report
def read_data():
# load pickle
#df = pd.read_pickle("./output/killed_collision_normal2class.pkl")
df = pd.read_pickle("./output/killed_collision_normal2class.pkl")
X_train, X_test, y_train, y_test=train_test_split(df.drop(columns=["KILLED"]), df["KILLED"],
test_size=0.3, random_state=0)
return df, X_train, X_test, y_train, y_test
#---------读取数据集
pd_data,X_train, X_test, y_train, y_test = read_data()
def feature_importance(features_num=20):
if(features_num > X_train.shape[1]):
print("the features num is too big for the trainData")
return
forest