模型:
高斯模型,多项式模型,伯努利模型等
本文用高斯模型举例
import numpy as np
import pandas as pd
import matplotlib as plot
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import naive_bayes
import math
from collections import Counter
def create_data():
iris = datasets.load_iris()
df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
df['label'] = iris['target']
datas = np.array(df.iloc[:, :])
x, y = datas[:, :-1], datas[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
return x_train, x_test, y_train, y_test
class Naive_Bayes_With_Gaussian:
def __index__(self):
self.model = None
def get_mean(self, x):
return sum(x) / len(x)
def get_variance(self, x):
avg = self.get_mean(x)
variance = sum([math.pow(xi - avg, 2) for xi in x]) / float(len(x))
return variance
# def get_stander(self, x):
# avg = self.get_mean(x)
# variance = sum([math.pow((xi - avg for xi in x), 2)]) / len(x)
# std = math.sqrt(variance)
# return std
def gaussian_probability(self, xi, yk_mean, yk_variance):
part_01 = 1 / math.sqrt(2 * math.pi * yk_variance)
part_02 = math.exp((-1) * math.pow(xi - yk_mean, 2) / (2 * yk_variance))
result = part_01 * part_02
return result
def get_yk_parameter_frim_trains(self, x_train, y_train):
# 总共3各类,计算每一个类别的每一个特征均值,方差
y0_x0 = []
y0_x1 = []
y0_x2 = []
y0_x3 = []
y1_x0 = []
y1_x1 = []
y1_x2 = []
y1_x3 = []
y2_x0 = []
y2_x1 = []
y2_x2 = []
y2_x3 = []
for i in range(len(y_train)):
if int(y_train[i]) == 0:
y0_x0.append(x_train[i][0])
y0_x1.append(x_train[i][1])
y0_x2.append(x_train[i][2])
y0_x3.append(x_train[i][3])
elif int(y_train[i]) == 1:
y1_x0.append(x_train[i][0])
y1_x1.append(x_train[i][1])
y1_x2.append(x_train[i][2])
y1_x3.append(x_train[i][3])
else:
y2_x0.append(x_train[i][0])
y2_x1.append(x_train[i][1])
y2_x2.append(x_train[i][2])
y2_x3.append(x_train[i][3])
y0_x0_mean = self.get_mean(y0_x0)
y0_x0_variance = self.get_variance(y0_x0)
y0_x1_mean = self.get_mean(y0_x1)
y0_x1_variance = self.get_variance(y0_x1)
y0_x2_mean = self.get_mean(y0_x2)
y0_x2_variance = self.get_variance(y0_x2)
y0_x3_mean = self.get_mean(y0_x3)
y0_x3_variance = self.get_variance(y0_x3)
y1_x0_mean = self.get_mean(y1_x0)
y1_x0_variance = self.get_variance(y1_x0)
y1_x1_mean = self.get_mean(y1_x1)
y1_x1_variance = self.get_variance(y1_x1)
y1_x2_mean = self.get_mean(y1_x2)
y1_x2_variance = self.get_variance(y1_x2)
y1_x3_mean = self.get_mean(y1_x3)
y1_x3_variance = self.get_variance(y1_x3)
y2_x0_mean = self.get_mean(y2_x0)
y2_x0_variance = self.get_variance(y2_x0)
y2_x1_mean = self.get_mean(y2_x1)
y2_x1_variance = self.get_variance(y2_x1)
y2_x2_mean = self.get_mean(y2_x2)
y2_x2_variance = self.get_variance(y2_x2)
y2_x3_mean = self.get_mean(y2_x3)
y2_x3_variance = self.get_variance(y2_x3)
return \
[
[
[y0_x0_mean, y0_x0_variance],
[y0_x1_mean, y0_x1_variance],
[y0_x2_mean, y0_x2_variance],
[y0_x3_mean, y0_x3_variance]
],
[
[y1_x0_mean, y1_x0_variance],
[y1_x1_mean, y1_x1_variance],
[y1_x2_mean, y1_x2_variance],
[y1_x3_mean, y1_x3_variance]
],
[
[y2_x0_mean, y2_x0_variance],
[y2_x1_mean, y2_x1_variance],
[y2_x2_mean, y2_x2_variance],
[y2_x3_mean, y2_x3_variance]
]
]
def predict(self, x_test, x_train, y_train):
results = self.get_yk_parameter_frim_trains(x_train, y_train)
my_predict = []
for xi in x_test:
c0_p0 = self.gaussian_probability(xi[0], results[0][0][0], results[0][0][1])
c0_p1 = self.gaussian_probability(xi[1], results[0][1][0], results[0][1][1])
c0_p2 = self.gaussian_probability(xi[2], results[0][2][0], results[0][2][1])
c0_p3 = self.gaussian_probability(xi[3], results[0][3][0], results[0][3][1])
c0 = c0_p0 * c0_p1 * c0_p2 * c0_p3
c1_p0 = self.gaussian_probability(xi[0], results[1][0][0], results[1][0][1])
c1_p1 = self.gaussian_probability(xi[1], results[1][1][0], results[1][1][1])
c1_p2 = self.gaussian_probability(xi[2], results[1][2][0], results[1][2][1])
c1_p3 = self.gaussian_probability(xi[3], results[1][3][0], results[1][3][1])
c1 = c1_p0 * c1_p1 * c1_p2 * c1_p3
c2_p0 = self.gaussian_probability(xi[0], results[0][0][0], results[0][0][1])
c2_p1 = self.gaussian_probability(xi[1], results[0][1][0], results[0][1][1])
c2_p2 = self.gaussian_probability(xi[2], results[0][2][0], results[0][2][1])
c2_p3 = self.gaussian_probability(xi[3], results[0][3][0], results[0][3][1])
c2 = c2_p0 * c2_p1 * c2_p2 * c2_p3
temp = max(c0, c1, c2)
cc = 0 if temp == c0 else 1 if temp == c1 else 2
my_predict.append(cc)
return my_predict
def score(self, y_test, y_predict):
right = 0
for i in range(len(y_test)):
if int(y_test[i]) == y_predict[i]:
right += 1
return right / float(len(y_test))
if __name__ == '__main__':
x_train, x_test, y_train, y_test = create_data()
clf = Naive_Bayes_With_Gaussian()
y_predict = clf.predict(x_test, x_train, y_train)
print(y_predict)
print(clf.score(y_test, y_predict))
clf = naive_bayes.GaussianNB()
clf.fit(x_train, y_train)
print(clf.score(x_test, y_test))