最近想要学习LDA算法,发现算法当中应用了EM(期望最大算法),于是仔细研究了一下,顿感数学的无限魅力。
想要学习EM算法,网上有许多参考
http://www.cnblogs.com/jerrylead/archive/2011/04/06/2006936.html#!comments
写的很清楚,不过没有具体例子,直接上手难免看不懂
推荐看张航老师的《统计学习方法》第九章,例子与算法推导以及算法扩展应用,写的很清楚,下载链接:
http://download.csdn.net/download/u014539580/10127332
以下代码仅实现了两个高斯混合模型在均匀分布条件下的参数估计,想要实现完全随机的非均匀分布的多高斯混合模型,可在上面加以修改。具体参考书中的9.3.2节
##python实现##
import math
#import copy
import numpy as np
import matplotlib.pyplot as plt
isdebug = False
# 指定k个高斯分布參数。这里指定k=2。
#注意2个高斯分布具有同样均方差Sigma。分别为Mu1,Mu2。
def ini_data(Sigma,Mu,k,N):
global X
global uMu
global Expectations
global S
X = np.zeros((1,N))
uMu = np.random.random(2)*5
S = np.random.random(2)*4
#uMu = np.array([10,30])
#S = np.array([5,2])
Expectations = np.zeros((N,k))
for i in range(0,N):
if np.random.random(1) > 0.5:
X[0,i] = np.random.normal()*Sigma[0] + Mu[0]
else:
X[0,i] = np.random.normal()*Sigma[1] + Mu[1]
if(not isdebug):
print("***********")
print(u"初始观測数据X:")
print(X)
# EM算法:步骤1。计算E[zij]
def e_step(Sigma,k,N):
global Expectations
global uMu
global X
global S
for i in range(0,N):
Denom = 0
for j in range(0,k):
Denom += 0.5*(1/(float(S[j]*math.sqrt(2*math.pi))))*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-uMu[j]))**2)
#print(Denom)
for j in range(0,k):
Numer = 0.5*(1/(float(S[j]*math.sqrt(2*math.pi))))*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-uMu[j]))**2)
Expectations[i,j] = Numer / Denom
if(isdebug):
print("***********")
print(u"隐藏变量E(Z):")
#print(Expectations)
# EM算法:步骤2。求最大化E[zij]的參数Mu
def m_step(k,N):
global Expectations
global X
for j in range(0,k):
Numer = 0
Denom = 0
sumSi = 0
for i in range(0,N):
Numer += Expectations[i,j]*X[0,i]
Denom +=Expectations[i,j]
uMu[j] = Numer / Denom
for i in range(0,N):
sumSi += Expectations[i,j]*((X[0,i]-uMu[j])**2)
#Denom +=Expectations[i,j]
#print('sumSi ' + str(sumSi))
#print('Denom ' + str(Denom))
S[j] = math.sqrt(sumSi / Denom)
# 算法迭代iter_num次,或达到精度Epsilon停止迭代
def run(Sigma,Mu,k,N,iter_num,Epsilon):
ini_data(Sigma,Mu,k,N)
print(uMu)
for i in range(iter_num):
print(i)
#Old_uMu = copy.deepcopy(uMu)
e_step(Sigma,k,N)
m_step(k,N)
print(uMu)
print(S)
'''
if(sum(abs(uMu-Old_uMu)) < Epsilon):
break
'''
if __name__ == '__main__':
run([6,15],[48,156],2,10000,200,0.0001)
plt.hist(X[0,:],50)
plt.show()