import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
# read the data
train=pd.read_csv("xxx.csv")
usedcols=['xxx', 'xxx','xxx']
df = pd.DataFrame(train,columns=usedcols)
model = RandomForestRegressor(random_state=1, max_depth=10)
df=pd.get_dummies(df)
model.fit(df,train.xxx)
#plot the feature importance graph
features = df.columns
importances = model.feature_importances_
indices = np.argsort(importances)[-9:] # top 10 features
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()
feature = SelectFromModel(model)
Fit = feature.fit_transform(df, train.xxx)