贝叶斯决策分类器界面设计以及3D画图

使用数据利用贝叶斯决策理论设计分类决策面

模式识别作业,老师收集了全班学生的性别身高体重和脚长信息等数据来设计分类决策面。python 编程实现分类决策面,数据集在这里下载

编程代码比较简单,实现的是模式识别第三版(张学工老师)28页的理论。

协方差矩阵有三种情况:
1.每一类的协方差矩阵都相同,且都等于对角阵。
2.每一类的协方差矩阵都相同。
3.各类协方差矩阵不相同。
条件的限制一步步放松,所以也越来越难编写。本程序未编写第一类情况,请读者自行编写。
有任何问题请留言或发邮件18125081@bjtu.edu.cn

case2分类器图片:

在这里插入图片描述

case3分类器图片:

在这里插入图片描述
该分类器和模式识别第三版课本32页的(d)图的决策面形式相同。

代码如下

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import preprocessing as process
from sklearn.metrics import roc_curve, auc
'''
   data_process 功能:
                     输入:文件路径,
                     输出:男女的归一化样本数据矩阵
'''
def data_process(load_road):
    X = pd.read_excel(load_road)
    del X['序号']
    del X['年龄']
    num = len(X['性别'])
    gender = X['性别'].tolist()
    height = X['身高'].tolist()
    weight = X['体重'].tolist()
    food_length = X['脚长'].tolist()
    #--------------性别数据处理函数实现----------------
    for i in range(num):
        if gender[i] == '女':
            gender[i] = 0
        elif gender[i] == '男':
            gender[i] = 1
    #--------------身高数据处理函数实现----------------
    for i in range(num):
        height[i] = str(height[i])
        height[i] = height[i].replace('m', '')
        height[i] = height[i].replace('M', '')
        height[i] = height[i].replace('米', '')
        if float(height[i]) > 100:
            height[i] = float(height[i]) / 100
        height[i] = float(height[i])
    # --------------体重数据处理函数实现----------------
    for i in range(num):
        weight[i] = str(weight[i])
        weight[i] = weight[i].replace('千克', '')
        weight[i] = weight[i].replace('KG', '')
        weight[i] = weight[i].replace('kg', '')
        weight[i] = weight[i].replace('Kg', '')
        weight[i] = weight[i].replace('kG', '')
        # 假设体重不超过100kg,
        if (float(weight[i]) > 100.0) & (float(weight[i]) < 400.0):
            weight[i] = float(weight[i]) / 2
        # 体重单位是按克来计算的
        elif float(weight[i]) > 20000:
            weight[i] = float(weight[i]) / 1000
        weight[i] = float(weight[i])
    # --------------脚掌数据处理函数实现----------------
    for i in range(num):
        food_length[i] = float(food_length[i])
        if (food_length[i] > 34) & (food_length[i] <= 50):
            food_length[i] = (food_length[i] + 10) / 2.0
        elif food_length[i] > 100:
            food_length[i] = food_length[i] / 10.0
    # --------------数据类型转换 & 数据归一化----------------
    height = np.array(height)
    weight = np.array(weight)
    food_length = np.array(food_length)
    scaler = process.MinMaxScaler(feature_range=[-1, 1])
    height = scaler.fit_transform(height.reshape(-1, 1))
    weight = scaler.fit_transform(weight.reshape(-1, 1))
    food_length = scaler.fit_transform(food_length.reshape(-1, 1))
    X['性别'] = gender
    X['身高'] = height
    X['体重'] = weight
    X['脚长'] = food_length
    X.to_csv('sample.csv')
    man = X[X['性别'] == 0].values  # 变量man和woman是数据清洗完成的结果
    woman = X[X['性别'] == 1].values
    return man, woman
'''
   calculation 功能:
                     输入:男女的归一化样本数据矩阵
                     输出:男、女协方差矩阵,平均矩阵,男、女样本均值,男、女先验概率,概率比值
'''
def calculation(man, woman):
    man_cov = np.cov(man[:, 1:4].T)
    woman_cov = np.cov(woman[:, 1:4].T)
    cov_avg = (man_cov + woman_cov) / 2.0
    u_man = np.transpose(man[:, 1:4].mean(axis=0))
    u_woman = np.transpose(woman[:, 1:4].mean(axis=0))
    man_num = len(man[:, 0])
    woman_num = len(woman[:, 0])
    num = man_num + woman_num
    P_man = float(man_num) / float(num)
    P_woman = float(woman_num) / float(num)
    P = np.log(P_woman / P_man)
    return man_cov, woman_cov, cov_avg, u_man, u_woman, P_man, P_woman, P
'''
     case2功能:
                输入:男、女的归一化样本,男、女协方差矩阵,男、女样本均值,概率比值
                输出:决策面三维坐标, 男女的决策结果
'''
def case2(man, woman, woman_cov, u_man, u_woman, P):
    w = np.dot(np.linalg.inv(woman_cov), (u_man - u_woman).T)
    w_0 = (-0.5 * np.dot(np.dot((u_man + u_woman), woman_cov), (u_man - u_woman).T)) + P
    x = np.arange(-1, 1, 0.001)
    y = np.arange(-1, 1, 0.001)
    x, y = np.meshgrid(x, y)
    z = (-w_0 - w[1] * y - w[0] * x) / w[2]  # case 2  的公式
    woman_result = []
    a = w[0]
    b = w[1]
    c = w[2]
    for i in woman:
        h = i[1]
        w = i[2]
        l = i[3]
        Z_ = a * h + b * w + c * l + w_0
        woman_result.append(Z_)
    man_result = []
    for i in man:
        h = i[1]
        w = i[2]
        l = i[3]
        Z_ = a * h + b * w + c * l + w_0
        man_result.append(Z_)
    return x, y, z, 0, man_result, woman_result

'''
     case3功能:
                输入:男、女的归一化样本,男、女协方差矩阵,男、女样本均值,男、女先验概率
                输出:决策面三维坐标
'''
def case3(man, woman, man_cov, woman_cov, u_man, u_woman, P_man, P_woman):
    W_man = np.linalg.inv(man_cov) * (-0.5)
    W_woman = np.linalg.inv(woman_cov) * (-0.5)
    W = np.subtract(W_man, W_woman)
    w_man = np.dot(W_man * (-2.0), u_man.T)
    w_woman = np.dot(W_woman * (-2.0), u_woman.T)
    w = np.subtract(w_man, w_woman)
    w_man_0 = -0.5 * np.dot(np.dot(u_man, np.linalg.inv(man_cov)), u_man.T) - 0.5 * np.log(
        np.linalg.det(man_cov)) + np.log(P_man)
    w_woman_0 = -0.5 * np.dot(np.dot(u_woman, np.linalg.inv(woman_cov)), u_woman.T) - 0.5 * np.log(
        np.linalg.det(woman_cov)) + np.log(P_woman)
    wi_0 = w_man_0 - w_woman_0
    x1 = np.arange(-1, 1, 0.01)
    y1 = np.arange(-1, 1, 0.01)
    x, y = np.meshgrid(x1, y1)
    print(x.shape)
    a = W[2, 2]
    b = ((W[0, 2] + W[2, 0]) * y + (W[1, 2] + W[2, 1]) * x + w[2])
    c = (W[0, 0] * x * x + W[1, 1] * y * y + (W[0, 1] + W[1, 0]) * x * y + w[0] * x + w[1] * y) + wi_0
    print(a.shape,b.shape,c.shape)
    z1 = ((-b - np.sqrt(b ** 2 - 4 * a * c)) / (2 * a))
    z2 = ((-b + np.sqrt(b ** 2 - 4 * a * c)) / (2 * a))
    w2 = w[2]
    w0 = w[0]
    w1 = w[1]
    W02 = W[0, 2]
    W01 = W[0, 1]
    W00 = W[0, 0]
    W11 = W[1, 1]
    W12 = W[1, 2]
    man_result = []
    woman_result = []
    for i in man:
        h = i[1]
        w = i[2]
        l = i[3]
        A = a
        B = ((W02 + W02) * w + (W12 + W12) * h + w2)
        C = A * l * l + B * l
        Z_ = C + ((W00 * h * h + W11 * w * w + (W01 + W01) * w * h + w0 * h + w1 * w) + wi_0)
        man_result.append(Z_)
    for i in woman:
        h = i[1]
        w = i[2]
        l = i[3]
        A = a
        B = ((W02 + W02) * w + (W12 + W12) * h + w2)
        C = A * l * l + B * l
        Z_ = C + ((W00 * h * h + W11 * w * w + (W01 + W01) * w * h + w0 * h + w1 * w) + wi_0)
        woman_result.append(Z_)
    return x, y, z1, z2, man_result, woman_result

# ------------------主程序-------------------------
if __name__ == "__main__":
    load_road = 'survey_data.xls'
    man, woman = data_process(load_road)
    man_cov, woman_cov, cov_avg, u_man, u_woman, P_man, P_woman, P = calculation(man, woman)
    # x, y, z1, z2, man_result, woman_result = case2(man, woman, woman_cov, u_man, u_woman, P)
    x, y, z1, z2, man_result, woman_result = case3(man, woman, man_cov, woman_cov, u_man, u_woman, P_man, P_woman)
    print(man_result)
    print(woman_result)
    # ----------使用 matplotlib 画三维图-------------------------
    fig = plt.figure()
    ax1 = Axes3D(fig)
    ax1.scatter(man[:, 1], man[:, 2], man[:, 3], color='b')
    ax1.scatter(woman[:, 1], woman[:, 2], woman[:, 3], color='r')
    if type(z2) == int:
        ax1.plot_surface(x, y, z1, color='w')
    else:
        ax1.plot_surface(x, y, z1, color='w')
        ax1.plot_surface(x, y, z2, color='y')
    plt.show()

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值