一 目标:使用决策树进行分类
二 评价属性
1、 信息增益(ID3):
2、信息增益率(C4.5):
3、基尼指数:
三 代码
import numpy as np
from math import log
message_range = [4, 4, 4, 3, 3, 3] # 不同特征的取值范围
# 矩阵中各列数值含义
# global message_to_num = [{},{},{},{},{},{}]
# message_to_num[0] = {'low':0, 'med':1, 'high':2, 'vhigh':3}
# message_to_num[1] = message_to_num[0]
# message_to_num[2] = {'2':0, '3':1, '4':2, '5more':3}
# message_to_num[3] = {'2':0, '4':1, 'more':2}
# message_to_num[4] = {'small':0, 'med':1, 'big':2}
# message_to_num[5] = {'low':0, 'med':1, 'high':2}
def readMat(file):
length = len(file.readlines())
file.seek(0, 0)
mat = np.zeros((length, 7), int)
i = 0
for content in file.readlines():
content = content[:-1]
arr = content.split(&#