朴素贝叶斯代码实现

import numpy as np

class NativeBayes:
	def __init__(self):
		# log(P(Y)), Y = 0,1
		self.log_label_0_ratio = 0
		self.log_label_1_ratio = 0
		
		# array of feature weights
		self.weights = None
		
		# log value of feat vector
		self.log_label_0_vec = None
		self.log_label_1_vec = None
		

	"""Fit Naive Bayes classifier according to data_set, labels

    Parameters
    ----------
    data_set : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    labels : array-like, shape = [n_samples]
        Target values.

    weights : array-like, shape = [n_samples], optional
        Weights applied to individual samples (1. for unweighted).

    Returns
    -------
    self : object
        Returns self.
    """
	def fit(self, data_set, labels, weights):
		self.weights = weights
		data_mat = np.array(data_set)
	
		data_num = len(data_mat)
		feat_num = len(data_mat[0])
		label_1_ratio = sum(labels) / float(data_num)
		label_0_ratio = 1 - label_1_ratio
		
		self.log_label_1_ratio = np.log(label_1_ratio)
		self.log_label_0_ratio = np.log(label_0_ratio)
		
		# using Laplace smoothing, initialization with alpha = 1
		label_1_feat_array = np.ones(feat_num)
		label_0_feat_array = np.ones(feat_num)
		label_1_feat_sum = float(feat_num)
		label_0_feat_sum = float(feat_num)
		
		for i in range(data_num):
			if labels[i] == 0:
				label_0_feat_array += data_mat[i]
				label_0_feat_sum += sum(data_mat[i])
			else:
				label_1_feat_array += data_mat[i]
				label_1_feat_sum += sum(data_mat[i])

		self.log_label_0_vec = np.log(label_0_feat_array / label_0_feat_sum)
		self.log_label_1_vec = np.log(label_1_feat_array / label_1_feat_sum)
		
		return self
	
	"""predict data label according to given testing data

    Parameters
    ----------
    data : {array}, shape = [n_features]
        Testing vector, where n_features is the number of features.

    Returns
    -------
    data label, 0 or 1.
    """
	def predict(self, data):
		p0 = sum(self.log_label_0_vec * data * self.weights) + self.log_label_0_ratio
		p1 = sum(self.log_label_1_vec * data * self.weights) + self.log_label_1_ratio
		if p0 > p1:
			return 0
		else:
			return 1
		
		
			


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值