#导入必要的包
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
#导入数据集
data = load_digits()
x = data.data
y = data.target
RF = RandomForestClassifier(random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('交叉验证得分: %.4f'%score)
RF = RandomForestClassifier(random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('基尼系数得分: %.4f'%score)
RF = RandomForestClassifier(criterion = 'entropy',random_state = 66)
score = cross_val_score(RF,x,y,cv=10).mean()
print('熵得分: %.4f'%score)
###调n_estimators参数
ScoreAll = []
for i in range(10,200,10):
DT = RandomForestClassifier(n_estimators = i,random_state = 66) #,criterion = 'entropy'
score = cross_val_score(DT,data.data,data.target,cv=10).mean()
ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)
max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()
###进一步缩小范围,调n_estimators参数
ScoreAll = []
for i in range(100,130):
DT = RandomForestClassifier(n_estimators = i,random_state = 66) #criterion = 'entropy',
score = cross_val_score(DT,data.data,data.target,cv=10).mean()
ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)
max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()
###粗调max_depth参数
ScoreAll = []
for i in range(10,30,3):
DT = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =i ) #,criterion = 'entropy'
score = cross_val_score(DT,data.data,data.target,cv=10).mean()
ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)
max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()
###调min_samples_split参数
ScoreAll = []
for i in range(2,10):
RF = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =16,min_samples_split = i ) #,criterion = 'entropy'
score = cross_val_score(RF,data.data,data.target,cv=10).mean()
ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)
max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()
###调min_samples_leaf参数
ScoreAll = []
for i in range(1,15,2):
DT = RandomForestClassifier(n_estimators = 117,random_state = 66,max_depth =16,min_samples_leaf = i,min_samples_split = 2 )
score = cross_val_score(DT,data.data,data.target,cv=10).mean()
ScoreAll.append([i,score])
ScoreAll = np.array(ScoreAll)
max_score = np.where(ScoreAll==np.max(ScoreAll[:,1]))[0][0] ##这句话看似很长的,其实就是找出最高得分对应的索引
print("最优参数以及最高得分:",ScoreAll[max_score])
plt.figure(figsize=[20,5])
plt.plot(ScoreAll[:,0],ScoreAll[:,1])
plt.show()
有条件的话,最后再进行进一步的网格搜索调参,取最优的几个参数
import time
start = time.time()
param_grid = {
'n_estimators':np.arange(140, 150),
'max_depth':np.arange(15, 18),
'min_samples_leaf':np.arange(1, 8),
'min_samples_split':np.arange(2, 5),
'max_features':np.arange(0.1, 1)
}
rfc = RandomForestClassifier(random_state=66)
GS = GridSearchCV(rfc,param_grid,cv=10)
GS.fit(data.data,data.target)
end = time.time()
print("循环运行时间:%.2f秒"%(end-start))
print(GS.best_params_)
print(GS.best_score_)
另一种调参方式,用网格搜索来依次调参,下面是示例
#调max_features参数
param_grid = {
'max_features':np.arange(0.1, 1)}
rfc = RandomForestClassifier(random_state=66,n_estimators = 117,max_depth = 16,min_samples_leaf =1 ,min_samples_split =4 )
GS = GridSearchCV(rfc,param_grid,cv=10)
GS.fit(data.data,data.target)
print(GS.best_params_)
print(GS.best_score_)
https://blog.csdn.net/u013044310/article/details/104045284?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2defaultCTRLISTdefault-1-104045284-blog-80337103.pc_relevant_aa_2&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2defaultCTRLISTdefault-1-104045284-blog-80337103.pc_relevant_aa_2&utm_relevant_index=2