# -*- coding: utf-8 -*- """ Created on Tue Aug 09 22:38:37 2016 @author: Administrator """ import time import numpy as np from sklearn.datasets import load_digits from sklearn.ensemble import RandomForestClassifier from sklearn.grid_search import GridSearchCV from sklearn.grid_search import RandomizedSearchCV # 生成数据 digits = load_digits() X, y = digits.data, digits.target # 元分类器 meta_clf = RandomForestClassifier(n_estimators=20) # ================================================================= # 设置参数 param_dist = { "max_depth": [3, None], "max_features": [1,5,7,11], "min_samples_split": [1,5,7,11], "min_samples_leaf": [1,5,7,11], "bootstrap": [True, False], "criterion": ["gini", "entropy"]} # 运行随机搜索 RandomizedSearch n_iter_search = 20 rs_clf = RandomizedSearchCV(meta_clf, param_distributions=param_dist,n_iter=n_iter_search) start = time.time() rs_clf.fit(X, y) print("RandomizedSearchCV took %.2f seconds for %d candidates parameter settings." % ((time.time() - start), n_iter_search)) print(rs_clf.grid_scores_) # ================================================================= # 设置参数 param_grid = { "max_depth": [3, None], "max_features": [1, 3, 10], "min_samples_split": [1, 3, 10], "min_samples_leaf": [1, 3, 10], "bootstrap": [True, False], "criterion": ["gini", "entropy"]} # 运行网格搜索 GridSearch gs_clf = GridSearchCV(meta_clf, param_grid=param_grid) start = time.time() gs_clf.fit(X, y) print("GridSearchCV took %.2f seconds for %d candidate parameter settings." % (time.time() - start, len(gs_clf.grid_scores_))) print(gs_clf.grid_scores_)
RandomizedSearchCV took 8.64 seconds for 20 candidates parameter settings.
[mean: 0.78075, std: 0.00987, params: {'bootstrap': True, 'min_samples_leaf': 11, 'max_features': 11, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 3},mean: 0.90874, std: 0.01377, params: {'bootstrap': False, 'min_samples_leaf': 7, 'max_features': 11, 'criterion': 'entropy', 'min_samples_split': 1, 'max_depth': None}, mean: 0.90929, std: 0.01682, params: {'bootstrap': True, 'min_samples_leaf': 5, 'max_features': 11, 'criterion': 'gini', 'min_samples_split': 1, 'max_depth': None}, mean: 0.91987, std: 0.01263, params: {'bootstrap': False, 'min_samples_leaf': 5, 'max_features': 5, 'criterion': 'gini', 'min_samples_split': 1, 'max_depth': None}, mean: 0.79911, std: 0.00789, params: {'bootstrap': False, 'min_samples_leaf': 11, 'max_features': 5, 'criterion': 'entropy', 'min_samples_split': 11, 'max_depth': 3}, mean: 0.91764, std: 0.01220, params: {'bootstrap': True, 'min_samples_leaf': 1, 'max_features': 11, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': None}, mean: 0.90874, std: 0.01730, params: {'bootstrap': False, 'min_samples_leaf': 11, 'max_features': 7, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': None}, mean: 0.76628, std: 0.01378, params: {'bootstrap': True, 'min_samples_leaf': 1, 'max_features': 1, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 3}, mean: 0.79076, std: 0.01176, params: {'bootstrap': False, 'min_samples_leaf': 5, 'max_features': 7, 'criterion': 'gini', 'min_samples_split': 7, 'max_depth': 3}, mean: 0.91152, std: 0.01354, params: {'bootstrap': False, 'min_samples_leaf': 1, 'max_features': 1, 'criterion': 'entropy', 'min_samples_split': 7, 'max_depth': None}, mean: 0.78854, std: 0.02561, params: {'bootstrap': True, 'min_samples_leaf': 5, 'max_features': 5, 'criterion': 'entropy', 'min_samples_split': 1, 'max_depth': 3}, mean: 0.80078, std: 0.00885, params: {'bootstrap': False, 'min_samples_leaf': 1, 'max_features': 5, 'criterion': 'gini', 'min_samples_split': 5, 'max_depth': 3}, mean: 0.92098, std: 0.00853, params: {'bootstrap': False, 'min_samples_leaf': 5, 'max_features': 7, 'criterion': 'gini', 'min_samples_split': 5, 'max_depth&