sklearn分类任务代码整理

本博客总结了一些sklearn做分类任务的代码:

  • x:特征矩阵
  • y:标签,即分的类型

切分测试集与验证集

from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

#训练集与测试集的切分
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state = 0)
stdsc = StandardScaler()
x_train_std = stdsc.fit_transform(x_train)
x_test_std = stdsc.fit_transform(x_test)
cv = ShuffleSplit(n_splits = 20,test_size=0.3)

分类任务交叉验证

KNN

from sklearn.neighbors import KNeighborsClassifier
 
knn_model = KNeighborsClassifier()
cv_cross = cross_val_score(knn_model,x_train_std,y_train,cv=kfold,scoring="accuracy")
print(cv_cross.mean()) # 交叉验证均值
print(cv_cross.std()) # 交叉验证标准差
print(cv_cross)

SVM

from sklearn import svm

svm_model = svm.SVC(kernel='rbf',C=1,gamma=0.1)
cv_cross = cross_val_score(svm_model,x_train_std,y_train,cv=kfold,scoring="accuracy")
print(cv_cross.mean()) # 交叉验证均值
print(cv_cross.std()) # 交叉验证标准差
print(cv_cross)

逻辑回归

from sklearn.linear_model import LogisticRegression
 
log_model = LogisticRegression()
cv_cross = cross_val_score(log_model,x_train_std,y_train,cv=kfold,scoring="accuracy")
print(cv_cross.mean()) # 交叉验证均值
print(cv_cross.std()) # 交叉验证标准差
print(cv_cross)

分类任务网格搜索

逻辑回归

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV 

log_model = LogisticRegression()
log_param = {
    "penalty":['l1','l2']
}
log_grid = GridSearchCV(log_model,log_param,cv = cv)
log_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(log_grid.best_params_)
print('Cross validation score :',log_grid.best_score_)

决策树

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
dt_model = DecisionTreeClassifier()


dt_param = {
    'max_features':[None,'sqrt','log2'],
    'max_depth':[None,np.sqrt(df.shape[0]),np.log2(df.shape[0])],
}
dt_grid = GridSearchCV(dt_model,dt_param,cv = cv)
dt_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(dt_grid.best_params_)
print('Cross validation score :',dt_grid.best_score_)

随机森林

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
rf_model = RandomForestClassifier()

rf_param = {'n_estimators':range(25,200,25)}
# rf_param = {'n_estimators':range(25,50,25)}

rf_grid = GridSearchCV(rf_model,rf_param,cv = cv)
rf_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(rf_grid.best_params_)
print('Cross validation score :',rf_grid.best_score_)

支持向量机

from sklearn import svm

svm_model = svm.SVC()
svm_param = {
    'kernel':['poly'], # 多项式
    'degree':[2,3]
}
svm_grid = GridSearchCV(svm_model,svm_param,cv = cv)
svm_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(svm_grid.best_params_)
print('Cross validation score :',svm_grid.best_score_)

或者使用高斯核:

svm_model = svm.SVC()
# svm_model = svm.SVC(kernel='linear',C=0.1,gamma=0.1)

svm_param = {
    'kernel':['rbf'], # 高斯
    'gamma':[10e-5,10e-4,10e-3,10e-2,10e-1,10,10e+1,10e+2,10e+3,10e+4,10e+5]
}
svm_grid = GridSearchCV(svm_model,svm_param,cv = cv)
svm_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(svm_grid.best_params_)
print('Cross validation score :',svm_grid.best_score_)

多层感知机

from sklearn.neural_network import MLPClassifier

mul_model = MLPClassifier()
mul_param = {
    'hidden_layer_sizes':[(32,64),(32,64,128),(32,64,128,256),(32,64,128,256,512)],
    'alpha':np.arange(0.1,0.5,0.05),
}
mul_grid = GridSearchCV(mul_model,mul_param,cv = cv)
mul_grid.fit(x_train_std,y_train)
print('Parameter with best score:')
print(mul_grid.best_params_)
print('Cross validation score :',mul_grid.best_score_)
  • 3
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

呆萌的代Ma

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值