机器学习-贝叶斯模型-西瓜书代码(Bayse)-拉普拉斯修正

以下代码是本人在学习西瓜书时花费两个礼拜根据原理进行原创,若需转载请咨询本人,谢谢!

自我研究模拟代码

附上离散类别截图

数据截图:

运行截图:

Bayes_config.py
"""
 Filename: Bayes
 Author: kdd_zyx
 Description: 机器学习 - 朴素贝叶斯
 Datas:kdd - 随机划分
 Start: 2018.10.11
 End:
"""

import random as r
from functools import reduce

text_tnum = 10545       # 抽取数据量
text_pret = 0.255       # 测试集百分比(计算需要排除属性集)

start = 1   # 数据上标
end = -1    # 结尾下标

def divi(x, y):
    # 除法异常捕捉
    if y == 0:
        return 0
    else:
        return x / y

def radf(text_addr, text_splt):
    with open(text_addr, 'r', encoding='UTF-8') as f:
        dataset = [eachLine.replace('\n', '').split(text_splt) for eachLine in f]
        f.close()
        return dataset

# 引入训练集、验证集、测试集
def Lead_dataset():
    try:
        dataset = radf('Car_Data.txt', ',')

        fin_dataset = []
        pro_dataset = dataset[0]
        txt_dataset = dataset[start : text_tnum]
        r.shuffle(txt_dataset)
        fin_dataset.append(pro_dataset)
        for data in txt_dataset:
            fin_dataset.append(data)

        return fin_dataset

    except Exception as e:
        print('Error:', e)
        Lead_dataset()
Bayes.py

"""
 Filename: Bayes
 Author: kdd_zyx
 Description: 机器学习 - 朴素贝叶斯
 Datas:kdd - 随机划分
 Start: 2018.10.11
 End:
"""

from Bayes_config import *

class Bayes(object):

    # 初始化朴素贝叶斯对象
    def __init__(self):
        pass

    # 引入属性集
    def Lead_attrest(self):
        new_data = [[] for i in range(len(self.tra_dataset[0]))]

        for data in self.tra_dataset:
            for num in range(len(data)):
                new_data[num].append(data[num])

        for num in range(len(new_data) - 1):
            new_data[num] = len(list(set(new_data[num])))

        # 类别集
        new_data[end] = list(set(new_data[end]))

        return new_data

    # 构建参数集
    def Main_data(self):
        dataset = Lead_dataset()

        if text_pret != 0:
            self.tex_num = int(len(dataset[start: ]) * text_pret + 1)
            self.tra_dataset = dataset[start : -self.tex_num]
            self.tex_dataset = dataset[-self.tex_num: ]
            self.tra_num = len(self.tra_dataset)

            self.pro_dataset = dataset[0]
            self.prd_dataset = self.Lead_attrest()

            self.Forecast(self.tra_dataset, self.tex_dataset)
        else:
            pass

    # 构建贝叶斯预测
    def Forecast(self, tra_dataset, tex_dataset):

        verify_num = 0
        end_sort = self.Cata_sort(tra_dataset)
        for data in tex_dataset:
            Clas = self.Foresort(data, end_sort)
            print(data[end], self.prd_dataset[end][Clas])

            if data[end] == self.prd_dataset[end][Clas]:
                verify_num += 1

        accury = round(divi(verify_num, self.tex_num), 3)
        print()
        print('Training dataset number:', self.tra_num)
        print('Texting  dataset number:', self.tex_num)
        print('The correct number:', verify_num)
        print('Accuracy:', accury)

    # 属性预测
    def Foresort(self, data, end_sort):
        Pc = [[]] * len(end_sort)

        for sor_num in range(len(end_sort)):

            sortn = len(end_sort[sor_num])
            sortP = [0] * (len(data) - 1)
            for cta_num in range(len(data) - 1):
                for sort in end_sort[sor_num]:
                    if data[cta_num] == sort[cta_num]:
                        sortP[cta_num] += 1
                sortP[cta_num] = divi(sortP[cta_num] + 1, sortn + self.prd_dataset[cta_num])

            Pc[sor_num] = self.Laplacian(sortn, sortP)

        return Pc.index(max(Pc))

    # 拉普拉斯修正
    def Laplacian(self, sortn, sortP):
        new_clas = divi(sortn + 1, len(self.tra_dataset) + self.tra_num)

        return new_clas * reduce(lambda x, y: x * y, sortP)

    # 类别分类
    def Cata_sort(self, tra_dataset):
        sort = [[] for i in range(len(self.prd_dataset[end]))]

        for data in tra_dataset:
            num = self.prd_dataset[end].index(data[end])
            sort[num].append(data)

        return sort

if __name__ == '__main__':
    bayes = Bayes()
    bayes.Main_data()
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值