import numpy
import pandas
if __name__ == '__main__':
df_datas = pandas.read_csv(r"数据集x1,x2,y")
x_names = df_datas.iloc[:, :-1].columns.values
# x_names = ["X1", "X2"]
y_names = df_datas.iloc[:, -1:].columns.values
# y_names = ["Y"]
y_values = df_datas.iloc[:, -1].unique()
# y_values = [1, -1]
prob_after_dict = { } # 保存后验概率
prob_before_dict = { } # 保存先验概率
# 取出每一个y的不同取值,计算先验概率以及取出后验概率的每一个样本点
for y_val in y_values:
# 求先验概率
y_val_cnt = len(df_datas.loc[df_datas["Y"] == y_val, :])
y_all_cnt = len(df_datas)
prob_y = y_val_cnt / y_all_cnt # 先验概率
before_key = f"P(Y={y_val})"
prob_before_dict[before_key] = prob_y # 将先验概率保存到字典
# 取出当前y值对应的所有样本点 y=1时与y=-1时
y_all_samp = df_datas.loc[df_datas["Y"] == y_val, :]
# 取出所有特征名,X1和X2
for x_n in x_names:
# 取出每一个特征名对应的所有可能取值,
# X1:1, 2, 3
# X2:S M L
x_values = y_all_samp[x_n].unique()
# 取出每一个特征对应的单独一个值,计算后验概率 P(xi = ?| y = ?)
for x_val in x_values:
# 取出值的条件
x_val_opt = y_all_samp[x_n] == x_val
# 根据条件从 当前 Y 对应的子数据集中取出样本点
df_x_val = y_all_samp.loc[x_val_opt, :]
x_val_cnt = len(df_x_val)
y_val_cnt = len(y_all_samp)
# 计算后验概率
prob_xTy = x_val_cnt / y_val_cnt
after_key = f"P({x_n}={x_val}|Y={y_val})"
prob_after_dict[after_key] = prob_xTy
# print(prob_before_dict)
# print(prob_after_dict)
test_xs = numpy.array(
[
[2, "S"],
[3, "S"],
[2, "L"],
]
)
test_ys = [] # 保存所有标签值
# 遍历每一个样本点
for xs in range(len(test_xs)):
test_x = test_xs[xs] # 取出一个样本点进行训练
# 保存每一个样本点类别的概率
prob_dict = {
-1:0,
1:0
}
# 根据每一个标签值计算先验概率、后验概率
for y_val in y_values:
prob = 1 # 用于临时保存一个样本点的朴素贝叶斯概率
# 查询字典得到后验概率
before_key = f"P(Y={y_val})"
prob_y = prob_before_dict[before_key]
prob *= prob_y # 将先验概率累乘到prob中保存
# 通过查询字典得到所有的后验概率
for i in range(2):
x_n = x_names[i]
x_val = test_x[i]
after_key = f"P({x_n}={x_val}|Y={y_val})"
# print(f"{after_key}", prob_after_dict[after_key])
prob_after = prob_after_dict[after_key]
prob *= prob_after # 将每一个后验概率值累乘到prob保存
# print(f"y={y_val}", prob)
prob_dict[y_val] = prob # 保存每个样本点的朴素贝叶斯概率
# print(prob_dict)
test_y = max(prob_dict, key = prob_dict.get)
test_ys.append(test_y)
test_ys = numpy.array(test_ys)
print(test_ys)
朴素贝叶斯底层
最新推荐文章于 2024-11-18 19:25:57 发布