针对统计学习方法中朴素贝叶斯估计的python实现中,大多数的小伙伴都使用了for循环进行编译。
因为我导师觉得看for循环太累了,所以用不带for循环的程序写了一下。
程序如下:
import numpy as np
import pandas as pd
train_data = np.array([[1, "S", -1],
[1, "M", -1],
[1, "M", 1],
[1, "S", 1],
[1, "S", -1],
[2, "S", -1],
[2, "M", -1],
[2, "M", 1],
[2, "L", 1],
[2, "L", 1],
[3, "L", 1],
[3, "M", 1],
[3, "M", 1],
[3, "L", 1],
[3, "L", -1]])
train_data = pd.DataFrame(train_data)
train_data1 = train_data[train_data[2] == '1']
train_data2 = train_data[train_data[2] == '-1']
ny = train_data[2].value_counts()
nx1y_1 = train_data1.iloc[:, [0, 2]].value_counts()
nx2y_1 = train_data1.iloc[:, [1, 2]].value_counts()
nx1y_2 = train_data2.iloc[:, [0, 2]].value_counts()
nx2y_2 = train_data2.iloc[:, [1, 2]].value_counts()
py = (ny + 1) / (train_data.shape[0] + ny.shape[0])
px1y_1 = (nx1y_1 + 1) / (train_data1.shape[0] + nx1y_1.shape[0])
px2y_1 = (nx2y_1 + 1) / (train_data1.shape[0] + nx2y_1.shape[0])
px1y_2 = (nx1y_2 + 1) / (train_data2.shape[0] + nx1y_2.shape[0])
px2y_2 = (nx2y_2 + 1) / (train_data2.shape[0] + nx2y_2.shape[0])
def nbc_test():
x1 = input('第一个变量:')
x2 = input('第二个变量:')
py1 = px1y_1[x1][0] * px2y_1[x2][0] * py['1']
py2 = px1y_2[x1][0] * px2y_2[x2][0] * py['-1']
if py1 < py2:
print('y=-1')
else:
print('y=1')
nbc_test()