python举例cart回归树_CART回归树&模型树 生成 剪枝 in Python

该博客介绍如何在Python中实现CART回归树,包括数据预处理、树的生成、剪枝以及预测功能。通过regLeaf和modelLeaf函数创建叶节点,使用chooseBestSplit选择最佳分割特征和值,并通过createTree构建决策树。还涵盖了后剪枝优化和预测评估。
摘要由CSDN通过智能技术生成

from numpy import *

def loadDataSet(fileName):

# creat a list, but following dataSet represents matrix

dataMat = []

fr = open(fileName)

for line in fr.readlines():

currLine = line.strip().split('\t')

fltLine = map(float, currLine)

dataMat.append(fltLine)

return dataMat

### Preparing for creating tree

# the function regleaf and modelleaf is going to create the leafnodes

def regLeaf(dataSet):

return mean(dataSet[:,-1])

def regErr(dataSet):

return shape(dataSet)[0] * var(dataSet[:,-1])

# used for measuring the uniformity of data

# or say for calculating the chaos of data

def linearSolve(dataSet):

N, n = shape(dataSet)

X = mat(ones((N,n)))

Y = mat(ones((N,1)))

X[:, 1:n] = dataSet[:, 0:n-1]

Y = dataSet[:, -1]

xTx = X.T * X

if linalg.det(xTx) == 0.0 :

raise NameError('This matrix is singular, cannot do inverse, \n\

try increasing the second value of ops')

ws = xTx.I * (X.T * Y)

return ws, X, Y

def modelLeaf(dataSet):

ws, X, Y = linearSolve(dataSet)

return ws

def modelErr(dataSet):

ws, X, Y = linearSolve(dataSet)

yHat = X * ws

return sum(power(Y-yHat, 2))

### Creating tree

def binSplitDataSet(dataSet, feature, value):

mat0 = dataSet[nonzero(dataSet[:,feature] >value)[0], :]

mat1 = dataSet[nonzero(dataSet[:,feature]<=value)[0], :]

return mat0, mat1

def chooseBestSplit(dataSet, leafType, errType, ops):

tolS = ops[0]# desent error value tolerated

tolN = ops[1]# minimum number of samples splited

if len(set(dataSet[:,-1].T.tolist()[0])) == 1 :

return None, leafType(dataSet)

N, n = shape(dataSet)

S = errType(dataSet)

bestS = inf; bestIndex = 0; bestValue = 0

for featIndex in range(n-1) :

for splitVal in set(dataSet[:,featIndex].T.tolist()[0]):

mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal)

if (shape(mat0)[0]

continue

newS = errType(mat0) + errType(mat1)

if newS < bestS:

bestIndex = featIndex

bestValue = splitVal

bestS = newS

if (S-bestS) < tolS :

return None, leafType(dataSet)

mat0, mat1 = binSplitDataSet(dataSet, bestIndex, bestValue)

if (shape(mat0)[0]

return None, leafType(dataSet)

return bestIndex, bestValue

def createTree(dataSet, leafType, errType, ops):

feat, val = chooseBestSplit(dataSet, leafType, errType, ops)

if feat == None:

return val

retTree = {}

retTree['spInd'] = feat

retTree['spVal'] = val

lSet, rSet = binSplitDataSet(dataSet, feat, val)

retTree['left'] = createTree(lSet, leafType, errType, ops)

retTree['right'] = createTree(rSet, leafType, errType, ops)

return retTree

### Post Purning

def isTree(obj):

return (type(obj).__name__ == 'dict')

def getMean(tree):

if isTree(tree['right']):

tree['right'] = getMean(tree['right'])

if isTree(tree['left']):

tree['left'] = getMean(tree['left'])

return (tree['right']+tree['left'])/2.0

def postPurning(tree, testData):

if shape(testData)[0] == 0 :

return getMean(tree)

if isTree(tree['right']) or isTree(tree['left']) :

lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])

if isTree(tree['left']) :

tree['left'] = postPurning(tree['left'], lSet)

if isTree(tree['right']) :

tree['right'] = postPurning(tree['right'], rSet)

if not isTree(tree['left']) and not isTree(tree['right']) :

lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])

errNoMerge = sum(power(lSet[:,-1]-tree['left'], 2)) + sum(power(rSet[:,-1]-tree['right'], 2))

treeMean = (tree['left']+tree['right']) / 2.0

errMerge = sum(power(testData[:,-1]-treeMean, 2))

if errMerge < errNoMerge :

print "merging"

return treeMean

else:

return tree

else:

return tree

### Predicting

def regTreeEval(model, inData):

return float(model)

def modelTreeEval(model, inData):

n = shape(inData)[1]

X = mat(zeros((1,n+1)))

X[:,1:n+1] = inData

return float(X*model)

def treeForecast(tree, inData, treeEval):

if not isTree(tree):

return treeEval(tree, inData)

if inData[tree['spInd']] > tree['spVal'] :

if isTree(tree['left']) :

return treeForecast(tree['left'], inData, treeEval):

else:

return treeEval(tree['left'], inData)

else:

if isTree(tree['right']) :

return treeForecast(tree['right'], inData, treeEval):

else:

return treeEval(tree['right'], inData)

def createForecast(tree, testData, treeEval):

M = len(testData)

yHat = mat(zeors(M,1))

for ii in range(M):

yHat[ii,0] = treeForecast(tree, mat(testData[ii]), treeEval)

return yHat

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值