机器学习分类+回归

机器学习分类+回归

模型选择

from sklearn import datasets
from sklearn import model_selection

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.svm import NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

import numpy as np

cl1=ExtraTreesClassifier()
cl2=RandomForestClassifier(random_state=1)
cl3=AdaBoostClassifier()
cl4=GradientBoostingClassifier()
cl5=GaussianProcessClassifier()
cl6=LogisticRegression()
cl7=RidgeClassifier()
cl8=SGDClassifier()
cl9=GaussianNB()
cl10=KNeighborsClassifier(n_neighbors=1)
cl11=MLPClassifier()
cl12=SVC()
cl13=NuSVC()
cl14=DecisionTreeClassifier()
cl15=ExtraTreeClassifier()
cl16=XGBClassifier()
cl17=LGBMClassifier()

print('3-fold cross validation:\n')

for clf, label in zip([cl1, cl2, cl3,cl4,cl5,cl6,cl7,cl8,cl9,cl10,
                      cl11,cl12,cl13,cl14,cl15,cl16,cl17], 
                      ['ExtraTreesClassifier', 
                       'RandomForestClassifier', 
                       'AdaBoostClassifier',
                      'GradientBoostingClassifier',
                      'GaussianProcessClassifier',
                      'LogisticRegression',
                      'RidgeClassifier',
                      'SGDClassifier',
                      'GaussianNB',
                      'KNeighborsClassifier',
                      'MLPClassifier',
                      'SVC',
                      'NuSVC',
                      'DecisionTreeClassifier',
                      'ExtraTreeClassifier',
                      'XGBClassifier',
                      'LGBMClassifier']):

    scores = model_selection.cross_val_score(clf,X_train,y_train,cv=3,scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]"%(scores.max(),scores.std(),label))

模型调参

from sklearn.model_selection import GridSearchCV
penaltys=['l2']#l1 或l2正则化
cs = [1.0,1.1,1.2,1.3,1.4,1.5]
param_grid = {'random_state':[1]}
#print(param_grid)
gsc = GridSearchCV(RandomForestClassifier(),param_grid)
# print(X_train)
gsc.fit(X_train,y_train)

print('最佳模型参数的评分:',gsc.best_score_)
print('最优参数')
best_params = gsc.best_estimator_.get_params()
print(best_params)
for param_name in sorted(param_grid.keys()):
    print(param_name,':',best_params[param_name])

常用模型调参

XGBClassifier调参

gsc.score(X_train,y_train)

在这里插入图片描述

gsc.score(X_test,y_test)

在这里插入图片描述

from sklearn.model_selection import cross_val_score
acc_list=[]
for i in range(0,100):
    acc=cross_val_score(XGBClassifier(n_estimators=i,min_child_weight=0.5,max_depth=6),X_train,y_train,cv=15).mean()
    acc_list.append(acc)
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.plot(acc_list)

在这里插入图片描述

np.argmax(acc_list)
acc_list[1]
rf=XGBClassifier(n_estimators=1,min_child_weight=0.5,max_depth=6)
rf.fit(X_train,y_train)
rf.score(X_train,y_train)
rf.score(X_test,y_test)

随机森林调参

#TODO:模型训练和预测
from sklearn.ensemble import ExtraTreesClassifier

etsc_params={
    'class_weight':'balanced',
    'criterion':'gini',
    'max_depth':None,
    'min_samples_split':3,
    'n_estimators':30
}
etsc=ExtraTreesClassifier(**etsc_params)
etsc.fit(X_train,y_train)
y_pred = etsc.predict(X_test)
print(etsc.score(X_train,y_train))

模型评估

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
cv_split = ShuffleSplit(n_splits=5, train_size=0.7, test_size=0.2)
score_ndarray = cross_val_score(etsc, X_train, y_train, cv=cv_split)
print(score_ndarray)
score_ndarray.mean()

线性回归调参

from sklearn.model_selection import 

penaltys=['l1','l2']#l1 或l2正则化
cs = [1.0,1.1,1.2,1.3,1.4,1.5]
param_grid = {'penalty':penaltys,'C':cs}
#print(param_grid)
gsc = GridSearchCV(LogisticRegression(),param_grid)
# print(X_train)
gsc.fit(X_train,y_train)

print('最佳模型参数的评分:',gsc.best_score_)
print('最优参数')
best_params = gsc.best_estimator_.get_params()
print(best_params)
for param_name in sorted(param_grid.keys()):
    print(param_name,':',best_params[param_name])
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值