交叉验证获得最佳二元决策树深度

最新推荐文章于 2024-03-11 17:20:40 发布

Istaroth

最新推荐文章于 2024-03-11 17:20:40 发布

阅读量1.3k

点赞数

分类专栏：算法

算法专栏收录该内容

51 篇文章 0 订阅

订阅专栏

10折交叉验证各个深度下的平均误差然后看看哪个深度会对预测产生明显的优势

# -*- coding:utf-8 -*-
import numpy
import matplotlib.pyplot as plot
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.externals.six import StringIO

#构造简单的数据y=x+random
npoints=100

#使x在-0.5和0.5之间共100份
xplot=[(float(i)/float(npoints)-0.5) for i in range(npoints+1)]

#多行变多列
x=[[s] for s in xplot]

#生成随机数并生成y=x+random
numpy.random.seed(1)
y=[s+numpy.random.normal(scale=0.1) for s in xplot]

nrow=len(x)
depthlist=[1,2,3,4,5,6,7]
xvalmse=[]
nxval=10

#使用各个深度数据循环尝试
for idepth in depthlist:
    #确定深度数据后分块交叉验证循环
    for ixval in range(nxval):
        #分割数据
        itest=[a for a in range(nrow) if a%nxval==ixval]
        itrain=[a for a in range(nrow) if a%nxval!=ixval]

        xtrain=[x[r] for r in itrain]
        xtest=[x[r] for r in itest]
        ytrain=[y[r] for r in itrain]
        ytest=[y[r] for r in itest]

        #训练
        treemodel=DecisionTreeRegressor(max_depth=idepth)
        treemodel.fit(xtrain,ytrain)

        #预测
        treeprediction=treemodel.predict(xtest)

        #算误差
        error=[ytest[r]-treeprediction[r] for r in range(len(xtest))]

        if ixval==0:
            ooserrors=sum([e*e for e in error])
        else:
            ooserrors+=sum([e*e for e in error])
    xvalmse.append(ooserrors/nrow)

plot.plot(depthlist,xvalmse)
plot.axis('tight')
plot.xlabel('depth')
plot.ylabel('mse')
plot.show()