介绍
代码
#coding:utf-8
class Hmm:
def __init__(self, description=None):
self.description = description
@property
def hidden_states(self):
return self._hidden_states
@hidden_states.setter
def hidden_states(self, vals):
self._hidden_states = vals
@property
def observed_states(self):
return self._observed_states
@observed_states.setter
def observed_states(self, vals):
self._observed_states = vals
@property
def transition_matrix(self):
return self._transition_matrix
@transition_matrix.setter
def transition_matrix(self, vals):
self._transition_matrix = vals
@property
def emission_matrix(self):
return self._emission_matrix
@emission_matrix.setter
def emission_matrix(self, vals):
self._emission_matrix = vals
@property
def start_prob(self):
return self._start_prob
@start_prob.setter
def start_prob(self, val):
self._start_prob = val
@property
def final_prob(self):
return self._final_prob
@final_prob.setter
def final_prob(self, val):
self._final_prob = val
@property
def num_states(self):
return self._num_states
@num_states.setter
def num_states(self, val):
self._num_states = val
def hmmfunc_wrapper(func):
def inner_wrapper(*args, **kwargs):
print '*** before called method=[%s]***' % func.__name__
func(args, kwargs)
print '*** after called method=[%s]***' % func.__name__
return inner_wrapper
import copy
import numpy as np
class HmmFunc:
def __init__(self, description):
self.description = description
def _find_start_state(self,state, src_states):
for i , val in enumerate(src_states):
if val == state:
return i
return 0
def cal_likehood(self, observerd_datas, hmm):
for j,state in enumerate(observerd_datas):
if j == 0:
#alpha(t,j) = a0(j) * emission(j,0)
start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
alpha = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
continue
tmp = [0.0] * len(alpha)
# Transition : N * N
# Emission : N * Ob
# alpha(t,j) = sum_k( alpha(t-1,k)* transition(k,m) )* emission(m,j)
idx = self._find_start_state(state,hmm.observed_states)
num_states = hmm.num_states
for k in range(num_states):
sum = 0
for m in range(num_states):
sum += alpha[m] * hmm.transition_matrix[m][k]
#endfor m
tmp[k] = sum * hmm.emission_matrix[k][idx]
#endfor k
alpha = [ v for v in tmp]
return np.sum(alpha)
def cal_most_prob_seq(self, observerd_datas, hmm):
#max_probs_end in step j
seq_len = len(observerd_datas)
num_states = hmm.num_states
backwards = [(num_states * [1.0])] * seq_len
delta = [(num_states * [1.0])] * seq_len
for j , state in enumerate(observerd_datas):
if j == 0:
start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
delta[0] = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
continue
idx = self._find_start_state(state, hmm.observed_states)
for k in range(num_states):
max_val = -1000
maxid = 0
for m in range(num_states):
tmp = delta[j-1][m] * hmm.transition_matrix[m][k]
if tmp > max_val:
max_val = tmp
maxid = m
delta[j][k] = max_val * hmm.emission_matrix[k][idx]
backwards[j][k] = maxid
#self.backwards = backwards
#self.delta = delta
idx = self._tranverse(delta, backwards, seq_len, num_states)
return self._get_most_prob(idx, hmm.hidden_states)
def _tranverse(self, delta, backwards, seq_len, num_states):
max_val = delta[seq_len-1][0]
max_id = 0
for key ,val in enumerate(delta[seq_len -1]):
if val > max_val:
val = max_val
max_id = key - 1
ret = []
ret.append(max_id)
for trs in range(seq_len -1 , 0, -1):
ret.append(backwards[trs][max_id])
max_id = backwards[trs][max_id]
return ret
def _get_most_prob(self, idx, hidden_states):
a = [ hidden_states[i] for i in idx]
#return a.reverse()
a.reverse()
return a
def do_inference(self):
pass
if __name__ == '__main__':
hmm = Hmm()
hmm.hidden_states = ['Sunny','Cloudy','Rainy']
hmm.observed_states = ['Damp','Soggy','Dry','Dryish']
hmm.num_states = 3
hmm.start_prob =[0.63,0.17,0.20]
hmm.transition_matrix=[[0.5,0.25,0.25], [0.375,0.125,0.375],[0.125,0.675,0.375]]
hmm.emission_matrix=[[0.15,0.05,0.6,0.20],[0.25,0.25,0.25,0.25],[0.35,0.5,0.05,0.10]]
func = HmmFunc('cal')
#given transition matrix, emission_matrix, init_prob , observerd_sequences to calculate probility
prob = func.cal_likehood(['Damp','Soggy','Dry'], hmm)
print prob
#given transition matrix, emission_matrix, init_prob, observerd_sequences to calculate most prob hidden states
most_prob = func.cal_most_prob_seq(['Damp','Soggy','Dry'], hmm)
print most_prob
#print func.backwards
#print func.delta
标注
1. 参数估计未作,也即inference
参考
http://blog.csdn.net/likelet/article/details/7056068
http://blog.sina.com.cn/s/blog_953f8a550100zh35.html
<<自然语言处理综述 -第二版>>