机器学习--python实现CART分类树--原生代码

import csv
from random import shuffle

#读取train.csv、test.csv
train = []
with open('D:\\train.csv', 'rt') as f:
    csvread = csv.reader(f)
    for i in csvread:
        i[0] = float(i[0])
        i[1] = float(i[1])
        i[2] = int(i[2])
        train.append(i)

test = []
with open('D:\\test.csv', 'rt') as f:
    csvread = csv.reader(f)
    for i in csvread:
        i[0] = float(i[0])
        i[1] = float(i[1])
        test.append(i)

#将训练集按4:1划分成估计集和验证集
train0 = []
train1 = []
gj = []
yz = []
for sample in train:
    if sample[-1] == 0:
        train0.append(sample)
    else:
        train1.append(sample)
l0 = int(len(train0) / 5)
l1 = int(len(train1) / 5)
shuffle(train0)
shuffle(train1)
yz = yz+train0[:l0]+train1[:l1]
gj = gj+train0[l0:]+train1[l1:]

#Gini指数
def Gini(D):
    D0 = []
    D1 = []
    for d in D:
        if d[-1] == 0:
            D0.append(d)
        else:
            D1.append(d)
    return 1-(len(D0)/len(D))**2-(len(D1)/len(D)**2)

#将集合按照给定的特征和值进行划分
def subD(D, n, value):
    Dl = []
    Dr = []
    for sample in D:
        if sample[n]<=value:
            Dl.append(sample)
        else:
            Dr.append(sample)
    return Dl, Dr


#选出最好的划分点
def best_feature(D):
    gini = 10
    a = 0
    s = 0
    for i in range(0,2):
        for j in range(len(D)):
            Dl, Dr = subD(D, i, D[j][i])
            if len(Dl)==0 or len(Dr)==0:
                continue
            temp_gini = (len(Dl)/len(D))*Gini(Dl) + (len(Dr)/len(D))*Gini(Dr)
            if temp_gini < gini:
                gini = temp_gini
                a = i
                s = D[j][i]
    return a, s, gini

#定义树的节点
class Node():
    def __init__(self, D, n, value, lchild=None, rchild=None):
        self.D = D
        self.n = n
        self.value = value
        self.lchild = lchild
        self.rchild = rchild

#创建CART树
def createtree(D):
    n, value,gini = best_feature(D)
    node = Node(D, n, value)
    Dl, Dr = subD(D, n, value)
    if len(D) > 200:
        node.lchild = createtree(Dl)
        node.rchild = createtree(Dr)
    return node

#根据得到的CART树对测试集进行预测
def testtree(tree, test):
    rig = 0
    for sample in test:
        node = tree
        D = tree.D
        while node.lchild!=None and node.rchild!=None:
            if sample[node.n] <= node.value:
                node = node.lchild
                D = node.D
            else:
                node = node.rchild
                D = node.D
        D0 = []
        D1 = []
        for i in D:
            if i[-1]==0:
                D0.append(i)
            else:
                D1.append(i)
        if len(D0) >= len(D1):
            t = 0
        else:
            t = 1
        if t == sample[-1]:
            rig+=1
    return rig/len(test)


tree = createtree(gj)
print('预测的正确率:', round(testtree(tree, yz), 4))

def pre_test(tree, test):
    for sample in test:
        node = tree
        D = tree.D
        while node.lchild!=None and node.rchild!=None:
            if sample[node.n] <= node.value:
                node = node.lchild
                D = node.D
            else:
                node = node.rchild
                D = node.D
        D0 = []
        D1 = []
        for i in D:
            if i[-1]==0:
                D0.append(i)
            else:
                D1.append(i)
        if len(D0) >= len(D1):
            t = 0
        else:
            t = 1
        sample.append(t)

pre_test(tree, test)
with open('D://pre.csv', 'wt', newline='') as f:
    csvin = csv.writer(f)
    csvin.writerows(test)



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值