#-*-coding:utf-8-*-
from math import log
import operator
def calcShanonEnt(dataSet):
'''
计算给定数据集的香农熵
:param dataSet:
:return:shanonEnt
'''
numEntries = len(dataSet)
labelCounts={}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel] +=1
shanonEnt = 0.0
for key in labelCounts:
prob= float(labelCounts[key])/numEntries
shanonEnt -= prob*log(prob,2)
return shanonEnt
def splitDataSet(dataSet,axis,value):
'''
按照给特定特征划分数据集
:param dataSet:
:param axis:
:param value:
:return:
'''
retDataSet=[]
for featVec in dataSet:
if featVec[axis] == value:
reducedFeatVec = featVec[:axis]
reducedFeatVec.extend(featVec[ax
使用python对决策树算法进行学习
最新推荐文章于 2023-11-06 21:54:44 发布