import numpy as np
class NativeBayes:
def __init__(self):
# log(P(Y)), Y = 0,1
self.log_label_0_ratio = 0
self.log_label_1_ratio = 0
# array of feature weights
self.weights = None
# log value of feat vector
self.log_label_0_vec = None
self.log_label_1_vec = None
"""Fit Naive Bayes classifier according to data_set, labels
Parameters
----------
data_set : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
labels : array-like, shape = [n_samples]
Target values.
weights : array-like, shape = [n_samples], optional
Weights applied to individual samples (1. for unweighted).
Returns
-------
self : object
Returns self.
"""
def fit(self, data_set, labels, weights):
self.weights = weights
data_mat = np.array(data_set)
data_num = len(data_mat)
feat_num = len(data_mat[0])
label_1_ratio = sum(labels) / float(data_num)
label_0_ratio = 1 - label_1_ratio
self.log_label_1_ratio = np.log(label_1_ratio)
self.log_label_0_ratio = np.log(label_0_ratio)
# using Laplace smoothing, initialization with alpha = 1
label_1_feat_array = np.ones(feat_num)
label_0_feat_array = np.ones(feat_num)
label_1_feat_sum = float(feat_num)
label_0_feat_sum = float(feat_num)
for i in range(data_num):
if labels[i] == 0:
label_0_feat_array += data_mat[i]
label_0_feat_sum += sum(data_mat[i])
else:
label_1_feat_array += data_mat[i]
label_1_feat_sum += sum(data_mat[i])
self.log_label_0_vec = np.log(label_0_feat_array / label_0_feat_sum)
self.log_label_1_vec = np.log(label_1_feat_array / label_1_feat_sum)
return self
"""predict data label according to given testing data
Parameters
----------
data : {array}, shape = [n_features]
Testing vector, where n_features is the number of features.
Returns
-------
data label, 0 or 1.
"""
def predict(self, data):
p0 = sum(self.log_label_0_vec * data * self.weights) + self.log_label_0_ratio
p1 = sum(self.log_label_1_vec * data * self.weights) + self.log_label_1_ratio
if p0 > p1:
return 0
else:
return 1
朴素贝叶斯代码实现
最新推荐文章于 2024-07-18 07:05:52 发布
689

被折叠的 条评论
为什么被折叠?



