# Sklearn常用模型及网格搜索总结(1)---代码

from sklearn.model_selection import train_test_split
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")
model.predict_proba([[0.9]])  #输出概率

acc=metrics.accuracy_score(y_test,pre)
print(acc)  

KNN

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(train_x, train_y)


from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB(alpha=0.01)
model.fit(train_x, train_y)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(penalty='l2')
model.fit(train_x, train_y)

GBDT

from sklearn.ensemble import GradientBoostingClassifier
model.fit(train_x, train_y)

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=8)
model.fit(train_x, train_y)

from sklearn.svm import SVC
model = SVC(kernel='rbf', probability=True)
model.fit(train_x, train_y)

XGBOOST

import xgboost as xgb
from xgboost.sklearn import XGBClassifier

1、xgb模型训练

xgb1 = XGBClassifier(
learning_rate =0.05,
n_estimators=2800,
max_depth=5,
min_child_weight=1,
gamma=0.21,
subsample=0.8,
colsample_bytree=0.75,
objective= 'binary:logistic',
scale_pos_weight=1,
seed=27)
print("fiting")
xgb1.fit(X_train,y_train)
pre=xgb1.predict(X_test)
print(pre)
print(y_test)
acc=metrics.accuracy_score(y_test,pre)
print(acc)

2、xgb超参搜索

param_test1 = {
'max_depth':range(3,7,2),
'min_child_weight':range(1,6,2)
}
param_test2 = {
'max_depth':[4,5,6],
'min_child_weight':[1,2,3]
}
param_test2b = {
'min_child_weight':[6,8,10,12]
}
#[0.0, 0.1, 0.2, 0.3, 0.4]
param_test3 = {
'gamma':[i/10.0 for i in range(0,5)]
}
param_test3b = {
'gamma':[0.17,0.18,0.19,0.20,0.21,0.22,0.23,0.24,0.25]
}
param_test4 = {
'subsample':[i/10.0 for i in range(6,10)],
'colsample_bytree':[i/10.0 for i in range(6,10)]
}
param_test5 = {
'subsample':[i/100.0 for i in range(75,90,5)],
'colsample_bytree':[i/100.0 for i in range(75,90,5)]
}
param_test6 = {
'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
param_test7 = {
'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05]
}
param_test8 = {
'seed':[24,25,26,27,28]
}
param_test9 = {
'learning_rate':[0.04,0.05,0.06]
}
gsearch1 = GridSearchCV(estimator =
XGBClassifier(
learning_rate =0.06,
n_estimators=2500,
max_depth=6,
min_child_weight=1,
gamma=0.2,
subsample=0.8,
colsample_bytree=0.75,
objective= 'binary:logistic',
scale_pos_weight=1,
seed=25),
param_grid=param_test9,cv=5,verbose=5)
gsearch1.fit(X_train,y_train)
print(gsearch1.best_params_,gsearch1.best_score_)

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
model = SVC(kernel='rbf', probability=True)
param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}
grid_search = GridSearchCV(model, param_grid, n_jobs=1, verbose=1)
grid_search.fit(train_x, train_y)
best_parameters = grid_search.best_estimator_.get_params()
for para, val in list(best_parameters.items()):
print(para, val)
model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)
model.fit(train_x, train_y)