【随机森林】

#导入必要的包
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np

#导入数据集
data = load_digits()
x = data.data
y = data.target

RF = RandomForestClassifier(random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('交叉验证得分: %.4f'%score)

RF = RandomForestClassifier(random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('基尼系数得分: %.4f'%score)
RF = RandomForestClassifier(criterion = 'entropy',random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('熵得分: %.4f'%score)

###调n_estimators参数
ScoreAll = []
for i in range(10,200,10):
    DT = RandomForestClassifier(n_estimators = i,random_state = 66) #,criterion = 'entropy'
    score = cross_val_score(DT,data.data,data.target,cv=10).mean()
    ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)

max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])  
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()

###进一步缩小范围,调n_estimators参数
ScoreAll = []
for i in range(100,130):
    DT = RandomForestClassifier(n_estimators = i,random_state = 66)   #criterion = 'entropy',
    score = cross_val_score(DT,data.data,data.target,cv=10).mean()
    ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)

max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])  
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()

###粗调max_depth参数
ScoreAll = []
for i in range(10,30,3):
    DT = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =i ) #,criterion = 'entropy'
    score = cross_val_score(DT,data.data,data.target,cv=10).mean()
    ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)

max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])  
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()

###调min_samples_split参数
ScoreAll = []
for i in range(2,10):
    RF = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =16,min_samples_split = i ) #,criterion = 'entropy'
    score = cross_val_score(RF,data.data,data.target,cv=10).mean()
    ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)

max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])  
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()

###调min_samples_leaf参数
ScoreAll = []
for i in range(1,15,2):
    DT = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =16,min_samples_leaf = i,min_samples_split = 2 ) 
    score = cross_val_score(DT,data.data,data.target,cv=10).mean()
    ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)

max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])  
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()

有条件的话,最后再进行进一步的网格搜索调参,取最优的几个参数

import time
start = time.time()

param_grid = {
  'n_estimators':np.arange(140, 150),
  'max_depth':np.arange(15, 18),
  'min_samples_leaf':np.arange(1, 8),
  'min_samples_split':np.arange(2, 5),
   'max_features':np.arange(0.1, 1)
}

rfc = RandomForestClassifier(random_state=66)
GS = GridSearchCV(rfc,param_grid,cv=10)
GS.fit(data.data,data.target)
end = time.time()
print("循环运行时间:%.2f秒"%(end-start))
print(GS.best_params_)
print(GS.best_score_)

另一种调参方式,用网格搜索来依次调参,下面是示例

#调max_features参数
param_grid = {
    'max_features':np.arange(0.1, 1)}

rfc = RandomForestClassifier(random_state=66,n_estimators = 117,max_depth = 16,min_samples_leaf =1 ,min_samples_split =4 )
GS = GridSearchCV(rfc,param_grid,cv=10)
GS.fit(data.data,data.target)
print(GS.best_params_)
print(GS.best_score_)

机器学习建模通用模板
代码参考博客

https://blog.csdn.net/u013044310/article/details/104045284?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2defaultCTRLISTdefault-1-104045284-blog-80337103.pc_relevant_aa_2&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2defaultCTRLISTdefault-1-104045284-blog-80337103.pc_relevant_aa_2&utm_relevant_index=2

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

独孤剑客6

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值