高斯贝叶斯分类器

最新推荐文章于 2024-05-23 15:43:21 发布

zc02051126

最新推荐文章于 2024-05-23 15:43:21 发布

阅读量6.2k

点赞数 1

分类专栏：统计机器学习算法理论

本文链接：https://blog.csdn.net/zc02051126/article/details/9964705

版权

统计机器学习算法理论专栏收录该内容

42 篇文章 5 订阅

订阅专栏

参考文献：

http://www.autonlab.org/tutorials/gaussbc12.pdf

python代码如下：

import numpy as np
from sklearn.utils import array2d
from sklearn.utils.extmath import logsumexp
import random
import matplotlib.pylab as plt
class GaussianBayes:
    def __init__(self):
        pass
    def train(self, x, y):
        n_samples, n_features = x.shape
        if(n_samples != y.shape[0]):
            raise ValueError('x and y have incompatible shapes.')
        self._classes = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]
        self._theta = np.zeros((n_classes, n_features))
        self._sigma = np.zeros((n_classes, n_features))
        self._class_prior = np.zeros(n_classes)
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self._theta[i, :] = np.mean(x[y == y_i, :], axis = 0)
            self._sigma[i, :] = np.var(x[y == y_i, :]) + epsilon
            self._class_prior[i] = np.float(np.sum(y == y_i)) / n_samples
        return self
    def predict(self, x):
        prob = self.predict_proba(x)
        indexs = []
        scores = []
        for ele in prob:
            index = np.argmax(ele)
            score = ele[index]
            indexs.append(index)
            scores.append(score)
        return [indexs, scores]
    def predict_log_prob(self, x):
        joint = self.joint_log_likelihood(x)
        #log_like_x = np.log(np.sum(np.exp(joint)))
        log_like_x = logsumexp(joint, axis = 1)
        return joint - np.atleast_2d(log_like_x).T
    def predict_proba(self, x):
        return np.exp(self.predict_log_prob(x))
    def joint_log_likelihood(self, x):
        x = array2d(x)
        joint_log_likelihood = []
        for i in xrange(np.size(self._classes)):
            jointi = np.log(self._class_prior[i])
            n_ij = - 0.5 * np.sum(np.log(np.pi * self._sigma[i, :]))
            n_ij -= 0.5 * np.sum(((x - self._theta[i, :]) ** 2) /
                                 (self._sigma[i, :]), 1)
            joint_log_likelihood.append(jointi + n_ij)

        joint_log_likelihood = np.array(joint_log_likelihood).T
        return joint_log_likelihood
        
def samples(n_samples, n_features = 10, classes = 5, rat = 0.2):
    x = np.zeros((n_samples, n_features))
    y = np.zeros((n_samples, 1))
    num = int(n_samples / classes)
    for i in range(0, classes):
        x[i*num:i*num + num] = np.random.random((num,n_features)) + i

    for i in range(0, x.shape[0]):
        y[i, 0] = int(i / num)
    index = np.arange(0, x.shape[0])
    random.shuffle(index)
    train_index = index[0: int((1-rat) * x.shape[0])]
    test_index = index[int((1-rat) * x.shape[0]):-1]
    train_x = x[train_index, :]
    train_y = y[train_index]
    test_x = x[test_index, :]
    test_y = y[test_index]
    return [train_x, train_y, test_x, test_y]         
def plotRes(pre, real, test_x):  
    s = set(pre)  
    col = ['r','b','g','y','m']  
    fig = plt.figure()  
    pre = np.array(pre)
    ax = fig.add_subplot(111)  
    for i in range(0, len(s)):  
        index1 = pre == i  
        index2 = real == i  
        x1 = test_x[index1, :]  
        x2 = test_x[index2, :]  
        ax.scatter(x1[:,0],x1[:,1],color=col[i],marker='v',linewidths=0.5)  
        ax.scatter(x2[:,0],x2[:,1],color=col[i],marker='.',linewidths=12)  
    plt.title('The prediction of the Gaussian Bayes')  
    plt.legend(('c1:predict','c1:true',  
                'c2:predict','c2:true',  
                'c3:predict','c3:true',  
                'c4:predict','c4:true',  
                'c5:predict','c5:true'), shadow = True, loc = (0.01, 0.4))  
    plt.show()        
if __name__ == '__main__':
    [train_x, train_y, test_x, test_y] = samples(2000, 30, 5)
    gb = GaussianBayes()
    gb.train(train_x, train_y)
    
    pred_y = gb.predict(test_x)
    plotRes(pred_y[0], test_y.ravel(), test_x)

预测结果如下：