调参的工具
网格搜索
导入需要的库
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.datasets import load_iris
iris=datasets.load_iris()
parrameters={'kernel':('linear','rbf'),'C':[1,10]}#模型参数
svc=svm.SVC(probability=True)
clf=GridSearchCV(svc,parrameters)
clf.fit(iris.data,iris.target)
GridSearchCV(cv=None, error_score=nan,
estimator=SVC(C=1.0, break_ties=False, cache_size=200,
class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3,
gamma='scale', kernel='rbf', max_iter=-1,
probability=True, random_state=None, shrinking=True,
tol=0.001, verbose=False),
iid='deprecated', n_jobs=None,
param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')},
pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
scoring=None, verbose=0)
print(clf.cv_results_)
{'mean_fit_time': array([0.00120158, 0.00120668, 0.00139761, 0.00099792]), 'std_fit_time': array([3.76263297e-04, 3.77709886e-04, 1.01544355e-03, 2.90693166e-05]), 'mean_score_time': array([0. , 0.00039043, 0.0001986 , 0. ]), 'std_score_time': array([0. , 0.00047848, 0.00039721, 0. ]), 'param_C': masked_array(data=[1, 1, 10, 10],
mask=[False, False, False, False],
fill_value='?',
dtype=object), 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf'],
mask=[False, False, False, False],
fill_value='?',
dtype=object), 'params': [{'C': 1, 'kernel': 'linear'}, {'C': 1, 'kernel': 'rbf'}, {'C': 10, 'kernel': 'linear'}, {'C': 10, 'kernel': 'rbf'}], 'split0_test_score': array([0.96666667, 0.96666667, 1. , 0.96666667]), 'split1_test_score': array([1. , 0.96666667, 1. , 1. ]), 'split2_test_score': array([0.96666667, 0.96666667, 0.9 , 0.96666667]), 'split3_test_score': array([0.96666667, 0.93333333, 0.96666667, 0.96666667]), 'split4_test_score': array([1., 1., 1., 1.]), 'mean_test_score': array([0.98 , 0.96666667, 0.97333333, 0.98 ]), 'std_test_score': array([0.01632993, 0.02108185, 0.03887301, 0.01632993]), 'rank_test_score': array([1, 4, 3, 1])}
#显示所有拟合模型的参数设定
pd.DataFrame(clf.cv_results_)
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_C | param_kernel | params | split0_test_score | split1_test_score | split2_test_score | split3_test_score | split4_test_score | mean_test_score | std_test_score | rank_test_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.001202 | 0.000376 | 0.000000 | 0.000000 | 1 | linear | {'C': 1, 'kernel': 'linear'} | 0.966667 | 1.000000 | 0.966667 | 0.966667 | 1.0 | 0.980000 | 0.016330 | 1 |
1 | 0.001207 | 0.000378 | 0.000390 | 0.000478 | 1 | rbf | {'C': 1, 'kernel': 'rbf'} | 0.966667 | 0.966667 | 0.966667 | 0.933333 | 1.0 | 0.966667 | 0.021082 | 4 |
2 | 0.001398 | 0.001015 | 0.000199 | 0.000397 | 10 | linear | {'C': 10, 'kernel': 'linear'} | 1.000000 | 1.000000 | 0.900000 | 0.966667 | 1.0 | 0.973333 | 0.038873 | 3 |
3 | 0.000998 | 0.000029 | 0.000000 | 0.000000 | 10 | rbf | {'C': 10, 'kernel': 'rbf'} | 0.966667 | 1.000000 | 0.966667 | 0.966667 | 1.0 | 0.980000 | 0.016330 | 1 |
最优模型结果
clf.best_params_#最优模型
{'C': 1, 'kernel': 'linear'}
估计样本的类别
print(clf.decision_function(iris.data))#估计样本的类别
[[ 2.24627744 1.2980152 -0.30616012]
[ 2.23781119 1.29663601 -0.30453043]
[ 2.24548583 1.2968967 -0.30542241]
…
随机搜索
import scipy.stats as stats
from sklearn import datasets
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
from sklearn.svm import SVC
iris=datasets.load_iris()
parrameters={'kernel':('linear','rbf'),
'C':stats.expon(scale=100),
'gamma':stats.expon(scale=.1),
'class_weight':('balanced',None)}
svc=SVC()
clf=RandomizedSearchCV(svc,parrameters)
clf.fit(iris.data,iris.target)
pd.DataFrame(clf.cv_results_)
clf.best_params_
{'C': 333.0779879298101,
'class_weight': None,
'gamma': 0.004641512813065941,
'kernel': 'rbf'}