第1关:高斯混合聚类的核心思想
![](https://img-blog.csdnimg.cn/direct/b6178ca113fe4dad9df40f7a2b3a62b5.png)
第2关:实现高斯混合聚类
import numpy as np
from scipy.stats import multivariate_normal
import numpy as np
def multiGaussian(x,mu,sigma):
return 1/((2*np.pi)*pow(np.linalg.det(sigma),0.5))*np.exp(-0.5*(x-mu).dot(np.linalg.pinv(sigma)).dot((x-mu).T))
def computeGamma(X,mu,sigma,alpha,multiGaussian):
n_samples=X.shape[0]
n_clusters=len(alpha)
gamma=np.zeros((n_samples,n_clusters))
p=np.zeros(n_clusters)
g=np.zeros(n_clusters)
for i in range(n_samples):
for j in range(n_clusters):
p[j]=multiGaussian(X[i],mu[j],sigma[j])
g[j]=alpha[j]*p[j]
for k in range(n_clusters):
gamma[i,k]=g[k]/np.sum(g)
return gamma
class GMM(object):
def __init__(self, n_components, max_iter=100):
'''
构造函数
:param n_components: 想要划分成几个簇,类型为int
:param max_iter: EM的最大迭代次数
'''
self.n_components = n_components
self.ITER = max_iter
def fit(self, train_data):
'''
训练,将模型参数分别保存至self.alpha,self.mu,self.sigma中
:param train_data: 训练数据集,类型为ndarray
:return: 无返回
'''
n_samples,n_features=train_data.shape
mu=train_data[np.random.choice(range(n_samples),self.n_components )]
alpha=np.ones(self.n_components )/self.n_components
sigma=np.full((self.n_components ,n_features,n_features),np.diag(np.full(n_features,0.1)))
for i in range(self.ITER):
gamma=computeGamma(train_data,mu,sigma,alpha,multiGaussian)
alpha=np.sum(gamma,axis=0)/n_samples
for i in range(self.n_components ):
mu[i]=np.sum(train_data*gamma[:,i].reshape((n_samples,1)),axis=0)/np.sum(gamma,axis=0)[i]
sigma[i]=0
for j in range(n_samples):
sigma[i]+=(train_data[j].reshape((1,n_features))-mu[i]).T.dot((train_data[j]-mu[i]).reshape((1,n_features)))*gamma[j,i]
sigma[i]=sigma[i]/np.sum(gamma,axis=0)[i]
self.mu=mu
self.sigma=sigma
self.alpha=alpha
def predict(self, test_data):
'''
预测,根据训练好的模型参数将test_data进行划分。
注意:划分的标签的取值范围为[0,self.n_components-1],即若self.n_components为3,则划分的标签的可能取值为0,1,2。
:param test_data: 测试集数据,类型为ndarray
:return: 划分结果,类型为你ndarray
'''
#********* Begin *********#
pred=computeGamma(test_data,self.mu,self.sigma,self.alpha,multiGaussian)
results=np.argmax(pred,axis=1)
return results
#********* End *********#
第3关:图像分割
from PIL import Image
import numpy as np
from sklearn.mixture import GaussianMixture
#******** Begin *********#
im = Image.open('./step3/image/test.jpg')
img = np.array(im)
img_reshape = img.reshape(-1,3)
gmm = GaussianMixture(n_components=3,covariance_type='full')
pred = gmm.fit_predict(img_reshape)
img_reshape[pred == 0, :] = [255,255,0]
img_reshape[pred==1,:] = [0,0,255]
img_reshape[pred==2,:] = [0,255,0]
im = Image.fromarray(img.astype('uint8'))
im.save('./step3/dump/result.jpg')
#********* End *********#