1.朴素贝叶斯模型
2.朴素贝叶斯代码实现
import numpy as np
# 定义加载数据的函数
def loaddata():
X = np.array([[1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'],
[1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'],
[2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'],
[3, 'M'], [3, 'L'], [3, 'L']])
y = np.array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])
return X, y
# 训练、计算各个概率值
def Train(trainset, train_labels):
# 数据量
m = trainset.shape[0]
# 特征数
n = trainset.shape[1]
# 先验概率,key是类别值,value是类别的概率值
prior_probability = {
}
# 条件概率,key的构造:类别,特征,特征值
conditional_probability = {
}
# 类别的可能取值
labels = set(train_labels)
# 计算先验概率,此时没有计算总数据量m
for label in labels:
prior_probability[label] = len(train_labels[train_labels == label])
print('prior_probabilit =', prior_probability)
# 计算条件概率
for i in range(m):
for j in range(n):
# key的构造:类别,特征,特征值
key = str(y[i]) + ',' + str(j) + ',' + str(trainset[i][j])
if key in conditional_probability:
conditional_probability[key] += 1
else:
conditional_probability[key] = 1
print('conditional_probability = ', conditional_probability)
# 因字典在循环时不能改变,故定义新字典来保存值
conditional_probability_final = {
}
for key in conditional_probability:
# 取出当前的类别
label = key.split(',')[0]
conditional_probability_final[key] = conditional_probability[key] / prior_probability[int(label)]
# 最终先验概率(除以总数据量m)
for label in labels:
prior_probability[label] = prior_probability[label] / m
return prior_probability, conditional_probability_final, labels
# 定义预测函数
def predict(data):
result = {
}
# 循环标签
for label in train_labels_set:
temp = 1.0
for j in range(len(data)):
key = str(label) + ',' + str(j) + ',' +