# python svm超参数_grid search 超参数寻优

http://scikit-learn.org/stable/modules/grid_search.html

1. 超参数寻优方法 gridsearchCV 和  RandomizedSearchCV

2. 参数寻优的技巧进阶

2.1. Specifying an objective metric

By default, parameter search uses the score function of the estimator to evaluate a parameter setting. These are thesklearn.metrics.accuracy_score for classification and sklearn.metrics.r2_score for regression.

2.2 Specifying multiple metrics for evaluation

Multimetric scoring can either be specified as a list of strings of predefined scores names or a dict mapping the scorer name to the scorer function and/or the predefined scorer name(s).

http://scikit-learn.org/stable/modules/model_evaluation.html#multimetric-scoring

2.3 Composite estimators and parameter spaces  。pipeline 方法

http://scikit-learn.org/stable/modules/pipeline.html#pipeline

>>> from sklearn.pipeline import Pipeline

>>> from sklearn.svm import SVC

>>> from sklearn.decomposition import PCA

>>> estimators = [('reduce_dim', PCA()), ('clf', SVC())]

>>> pipe = Pipeline(estimators)

>>> pipe # check pipe

Pipeline(memory=None,

steps=[('reduce_dim', PCA(copy=True,...)),

('clf', SVC(C=1.0,...))])

>>> from sklearn.pipeline import make_pipeline

>>> from sklearn.naive_bayes import MultinomialNB

>>> from sklearn.preprocessing import Binarizer

>>> make_pipeline(Binarizer(), MultinomialNB())

Pipeline(memory=None,

steps=[('binarizer', Binarizer(copy=True, threshold=0.0)),

('multinomialnb', MultinomialNB(alpha=1.0,

class_prior=None,

fit_prior=True))])

>>> pipe.set_params(clf__C=10) # 给clf 设定参数

>>> from sklearn.model_selection import GridSearchCV

>>> param_grid = dict(reduce_dim__n_components=[2, 5, 10],

... clf__C=[0.1, 10, 100])

>>> grid_search = GridSearchCV(pipe, param_grid=param_grid)

#!/usr/bin/env python2

# -*- coding: utf-8 -*-

"""

Created on Tue Sep 5 10:22:07 2017

@author: xinpingbao

"""

import numpy as np

from sklearn import datasets

from sklearn.linear_model import Ridge

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import make_scorer

X = dataset.data

y = dataset.target

# prepare a range of alpha values to test

alphas = np.array([1,0.1,0.01,0.001,0.0001,0])

# create and fit a ridge regression model, testing each alpha

model = Ridge()

grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas)) # defaulting: sklearn.metrics.r2_score

# grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas), scoring = 'metrics.mean_squared_error') # defaulting: sklearn.metrics.r2_score

grid.fit(X, y)

print(grid)

# summarize the results of the grid search

print(grid.best_score_)

print(grid.best_estimator_.alpha)

############################ 自定义error score函数 ############################

model = Ridge()

alphas = np.array([1,0.1,0.01,0.001,0.0001,0])

param_grid1 = dict(alpha=alphas)

def my_mse_error(real, pred):

w_high = 1.0

w_low = 1.0

weight = w_high * (real - pred < 0.0) + w_low * (real - pred >= 0.0)

mse = (np.sum((real - pred)**2 * weight) / float(len(real)))

return mse

def my_r2_score(y_true, y_pred):

nume = sum((y_true - y_pred) ** 2)

deno= sum((y_true - np.average(y_true, axis=0)) ** 2)

r2_score = 1 - (nume/deno)

return r2_score

error_score1 = make_scorer(my_mse_error, greater_is_better=False) # error less is better.

error_score2 = make_scorer(my_r2_score, greater_is_better=True) # error less is better.

#custom_scoring = {'weighted_MSE' : salesError}

grid_search = GridSearchCV(model, param_grid = param_grid1, scoring= error_score2, n_jobs=-1) #neg_mean_absolute_error

grid_result = grid_search.fit(X,y)

# summarize results

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) # learning_rate = 0.1

