python 决策树使用

决策树使用

工具:Pycharm,win10,Python3.6.4

1.题目要求

根据如下数据使用决策树算法进行预测。

Roles Duration Audiobook Genre
5 80 no Action
15 120 yes Drama
15 100 yes Action
20 80 no Drama
5 80 no Action
12 115 yes ?
2 180 yes ? 

2.Python代码

给出三种属性,判断是那种类型,这部分我们直接给出代码了。

from math import log
import operator

def calcShannon(dataSet):
    num = len(dataSet)
    labelCount = {}
    for fea in dataSet:
        currentLabel = fea[-1]
        if currentLabel not in labelCount.keys():
            labelCount[currentLabel] = 0
        labelCount[currentLabel] += 1
    shannon = 0.0
    for key in labelCount:
        prob = float(labelCount[key]) / num
        shannon -= prob * log(prob,2)
    return shannon

def createDataSet():
    dataSet = [[0,0,0,0],
               [1,2,1,1],
               [1,1,1,0],
               [2,0,0,1],
               [0,0,0,0],]
    labels = ['Roles','Duration','Audiobook',]
    return dataSet,labels

def splitDataSet(dataSet,axis,value):
    retDataSet = []
    for fea in dataSet:
        if fea[axis] == value:
            reducedFea = fea[:axis]
            reducedFea.extend(fea[axis+1:])
            retDataSet.append(reducedFea)
    return retDataSet

def BestFea(dataSet):
    numFea = len(dataSet[0]) - 1
    baseEntropy = calcShannon(dataSet)
    bestInfo = 0.0
    bestFeature = -1
    for i in range(numFea):
        feaList = [example[i] for example in dataSet]
        uniqueVals = set(feaList)
        newEntropy = 0.0
        for value in uniqueVals:
            subDataSet = splitDataSet(dataSet,i,value)
            prob = len(subDataSet) / float(len(dataSet))
        infoGain = baseEntropy - newEntropy
        if infoGain > bestInfo:
            bestInfo = infoGain
            bestFeature = i
    return  bestFeature

def majority(classList):
    classCount = {}
    for vote in classList:
        if vote not in classCount.keys():
            classCount[vote] = 0
        classCount[vote] += 1
    sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
    return sortedClassCount

def createTree(dataSet, labels, featLabels):
    classList = [example[-1] for example in dataSet]
    if classList.count(classList[0]) == len(classList):
        return classList[0]
    if len(dataSet[0]) == 1 or len(labels) == 0:
        return majority(classList)
    bestFeat = BestFea(dataSet)
    bestFeatLabel = labels[bestFeat]
    featLabels.append(bestFeatLabel)
    myTree = {bestFeatLabel:{}}
    del(labels[bestFeat])
    featValues = [example[bestFeat] for example in dataSet]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), labels, featLabels)
    return myTree


def classify(inputTree, featLabels, testVec):
    classLabel = ''
    firstStr = next(iter(inputTree))
    secondDict = inputTree[firstStr]
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else: classLabel = secondDict[key]
    return classLabel

if __name__ == '__main__':
    dataSet, labels = createDataSet()
    featLabels = []
    myTree = createTree(dataSet, labels, featLabels)
    testVec1 = [1,2,1]
    result = classify(myTree, featLabels, testVec1)
    if result == 1:
        print('Drama')
    else:
        print('Action')
    testVec2 = [0,2,1]
    result = classify(myTree, featLabels, testVec2)
    if result == 1:
        print('Drama')
    else:
        print('Action')

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值