类别:个人机器学习笔记(数学推导见上传的手写PDF)
书籍参考:《机器学习实战》、《统计学习》、《机器学习》
任务:结合离散决策树的判别准则和连续型决策树的特征选取方法,生成下面数据的连续型分类树
#调用一些包
import numpy as np
import math
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False #中文字显示
#连续型型数据分类
def createDataXG():
data = np.array([[0.697,0.460],
[0.774,0.376],
[0.634,0.264],
[0.608,0.318],
[0.556,0.215],
[0.403,0.237],
[0.481,0.149],
[0.437,0.211],
[0.666,0.091],
[0.243,0.267],
[0.245,0.057],
[0.343,0.099],
[0.639,0.161],
[0.657,0.198],
[0.360,0.370],
[0.593,0.042],
[0.719,0.103]])
label = np.array(['好瓜','好瓜','好瓜','好瓜','好瓜','好瓜','好瓜','好瓜','坏瓜','坏瓜','坏瓜','坏瓜','坏瓜','坏瓜','坏瓜','坏瓜','坏瓜'])
feature = np.array(['密度', '含糖量'])
return data, label, feature
#定义所需要的函数
#定义基尼
def Gini(feature,x):
featuresortedlist = sorted(feature)
bounds = [featuresortedlist[i]/2+featuresortedlist[i+1]