第九章 回归树@TOC
回归树中的错误讲解math.pow不能对矩阵直接进行操作
import math
import numpy as np
import matplotlib.pyplot as plt
def loadDataSet(fileName): #general function to parse tab -delimited floats
dataMat = [] #assume last column is target value
fr = open(fileName)
for line in fr.readlines():
curLine = line.strip().split('\t')
fltLine = []
for i in curLine:
fltLine.append(float(i))
dataMat.append(fltLine)
return dataMat
def binSplitDataSet(dataMat,feature,value):
mat0=dataMat[np.nonzero(dataMat[:,feature]>value)[0]]
mat1=dataMat[np.nonzero(dataMat[:,feature]<=value)[0]]
return mat0,mat1
'''
给出建立叶节点的函数
'''
def regLeaf(dataMat):
return np.mean(dataMat[:,-1])
'''
给出计算误差的函数,总方差=方差*样本总数
'''
def regErr(dataMat):
return np.var(dataMat[:,-1])*dataMat.shape[0]
'''找到数据集切分的最佳位置
'''
def chooseBestFeature(dataMat,leafType = regLeaf,errType = regErr,ops = (1,4)):
tolS=ops[0];tolN=ops[1]
if len(set(dataMat[:,-1].T.tolist()[0])