本文是KFold应用的一个实例,基于鸢尾花数据做5折交叉验证,测试最优树深的一个例子。
导入相应包:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_iris
from lightgbm import LGBMClassifier
加载鸢尾花数据:
iris = load_iris()
x, y = iris.data, iris.target
通过交叉验证测试最优树深,从1-10查找,最后选择得分最高的树深。
kf =KFold(n_splits=5, shuffle=True)
max_depths=range(1,11)
result=[]
dit = {}
for max_depth in max_depths:
for k,(train,test) in enumerate(kf.split(x,y)):
test_score=[]
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
print("train_split_rate:",len(x_train)/len(x))
clg = LGBMClassifier(
objective="multiclass",
boosting="gbdt",
learning_rate=0.1,
max_depth=max_depth,
n_estimators=100,
num_leaves=31,
lambda_l1=0.1,
lambda_l2=0.1,
seed=0
)
clg.fit(x_train,y_train,eval_set=[(x_test, y_test)],verbose=-1)
print("第%s轮验证:"%(k+1))
print("本轮训练集得分:%.2f%%"%(clg.score(x_train,y_tra