开发环境
jupyter notebook
from sklearn.datasets import load_iris
from sklearn import model_selection
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
iris =load_iris()
x =iris.data
y =iris.target
X_train,X_test,y_train,y_test = model_selection\
.train_test_split(x,y,test_size=0.3,
random_state=123456)
rf =RandomForestClassifier()
rf.fit(X_train,y_train)
rf.score(X_test,y_test)
iris.feature_names
rf.feature_importances_
parameters ={
'n_estimators':[5,10,15,20,30],
'max_features':[1,2,3,4],
'criterion':['gini','entropy']
}
rf =RandomForestClassifier()
grid_search =GridSearchCV(rf,parameters,scoring='accuracy',cv=5)
grid_search.fit(x,y)
grid_search.best_estimator_
grid_search.best_score_
grid_search.best_params_
rf2 =RandomForestClassifier(criterion='gini',max_features=2,
n_estimators=10)
rf2.fit(X_train,y_train)
rf2.score(X_test,y_test)