Python:相对简洁的基于高斯混合模型的聚类算法(GMM)

"""
GMM clustering algorithm
By Daniel He
At CQUPT
"""
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
from sklearn import datasets
import matplotlib.pyplot as plt



class GMM():
    def __init__(self, X, Y, K, max_iters=100):
        self.X = X
        self.Y = Y
        self.K = K  #高斯模型的个数
        self.max_iters = max_iters
        self.nSample, self.nDim = self.X.shape
        self.mu, self.cov, self.weight = self.Initial()
        self.gamma = np.zeros((self.nSample, self.K))
        self.assignments = None
        self.run()

    def Initial(self):
        kmeans = KMeans(n_clusters=self.K)
        assignments = kmeans.fit_predict(self.X)
        labs, count = np.unique(assignments, return_counts=True)
        mu = kmeans.cluster_centers_
        cov = np.array([np.eye(self.nDim)] * self.K)
        weight = np.ones(self.K)
        for i, lab in enumerate(labs):
            weight[i] = count[i] / np.sum(count)
            cov[i] = np.cov(self.X[assignments==lab].T)
        return mu, cov, weight

    def Expectation(self):
        for k in range(self.K):
            self.gamma[:,k] = multivariate_normal.pdf(self.X, self.mu[k],self.cov[k])
        self.gamma = self.weight * self.gamma
        self.gamma /= self.gamma.sum(axis=1)[:,np.newaxis]

    def Maximization(self):
        self.weight = self.gamma.sum(axis=0)
        for k in range(self.K):
            resp = self.gamma[:,k][:,np.newaxis]
            self.mu[k] = (resp * self.X).sum(axis=0) / self.weight[k]
            self.cov[k] = ((self.X - self.mu[k]).T).dot((self.X - self.mu[k])*resp) / self.weight[k]
        self.weight /= self.weight.sum()

    def run(self):
        for i in range(self.max_iters):
            self.Expectation()
            self.assignments = self.gamma.argmax(axis=1)
            self.Maximization()


if __name__ == '__main__':
    # X, y = load_iris(return_X_y=True)
    # X = X[:,0:2]
    X, y = datasets.make_blobs(n_features=2, n_samples=800,centers=3, random_state=4, cluster_std=[1, 1, 1])
    plt.scatter(X[:,0],X[:,1],c=y)
    plt.show()

    gmm = GMM(X=X, Y=y, K=3, max_iters=100)
    y_hat = gmm.assignments
    plt.scatter(X[:,0],X[:,1],c=y_hat)
    plt.show()

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值