hmm

介绍



代码

#coding:utf-8

class Hmm:
	def __init__(self, description=None):
		self.description = description

	@property
	def hidden_states(self):
	    return self._hidden_states
	@hidden_states.setter
	def hidden_states(self, vals):
		self._hidden_states = vals

	@property
	def observed_states(self):
	    return self._observed_states
	@observed_states.setter
	def observed_states(self, vals):
		self._observed_states = vals

	@property
	def transition_matrix(self):
	    return self._transition_matrix
	@transition_matrix.setter
	def transition_matrix(self, vals):
		self._transition_matrix = vals

	@property
	def emission_matrix(self):
	    return self._emission_matrix
	@emission_matrix.setter
	def emission_matrix(self, vals):
		self._emission_matrix = vals

	@property
	def start_prob(self):
		return self._start_prob
	@start_prob.setter
	def start_prob(self, val):
		self._start_prob = val

	@property
	def final_prob(self):
		return self._final_prob
	@final_prob.setter
	def final_prob(self, val):
		self._final_prob = val

	@property
	def num_states(self):
	    return self._num_states
	@num_states.setter
	def num_states(self, val):
		self._num_states = val


	
	


def hmmfunc_wrapper(func):
	def inner_wrapper(*args, **kwargs):
		print '*** before called method=[%s]***' % func.__name__
		func(args, kwargs)
		print '*** after called method=[%s]***' % func.__name__
	return inner_wrapper

import copy
import numpy as np

class HmmFunc:
	def __init__(self, description):
		self.description = description

	def _find_start_state(self,state, src_states):
		for i , val in enumerate(src_states):
			if val == state:
				return i
		return 0

	def cal_likehood(self, observerd_datas, hmm):

		for j,state in enumerate(observerd_datas):
			if j == 0:
				#alpha(t,j) = a0(j) * emission(j,0)
				start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
				alpha = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
				continue

			tmp = [0.0] * len(alpha)
			# Transition : N * N
			# Emission   : N * Ob
			# alpha(t,j) = sum_k( alpha(t-1,k)* transition(k,m) )* emission(m,j)
			idx = self._find_start_state(state,hmm.observed_states)
			num_states = hmm.num_states
			for k in range(num_states):
				sum = 0
				for m in range(num_states):
					sum += alpha[m] * hmm.transition_matrix[m][k]
				#endfor m
				tmp[k] = sum * hmm.emission_matrix[k][idx]
			#endfor k
			alpha = [ v for v in tmp]
		return np.sum(alpha)

	def cal_most_prob_seq(self, observerd_datas, hmm):
		#max_probs_end in step j
		seq_len = len(observerd_datas)
		num_states = hmm.num_states
		backwards = [(num_states * [1.0])] * seq_len
		delta = [(num_states * [1.0])] * seq_len
		for j , state in enumerate(observerd_datas):
			if j == 0:
				start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
				delta[0] = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
				continue
			idx = self._find_start_state(state, hmm.observed_states)
			for k in range(num_states):
				max_val = -1000
				maxid = 0
				for m in range(num_states):
					 tmp = delta[j-1][m] * hmm.transition_matrix[m][k]
					 if tmp > max_val:
					 	max_val = tmp
					 	maxid = m
				delta[j][k] = max_val * hmm.emission_matrix[k][idx]
				backwards[j][k] = maxid
		#self.backwards = backwards
		#self.delta = delta
		idx = self._tranverse(delta, backwards, seq_len, num_states)
		return self._get_most_prob(idx, hmm.hidden_states)

	def _tranverse(self, delta, backwards, seq_len, num_states):
		max_val = delta[seq_len-1][0]
		max_id  = 0
		for key ,val in enumerate(delta[seq_len -1]):
			if val > max_val:
				val = max_val
				max_id = key - 1

		ret = []
		ret.append(max_id)
		for trs in range(seq_len -1 , 0, -1):
			ret.append(backwards[trs][max_id])
			max_id = backwards[trs][max_id]
		return ret

	def _get_most_prob(self, idx, hidden_states):
		a = [ hidden_states[i] for i in idx]
		#return a.reverse()
		a.reverse()
		return a

	def do_inference(self):
		pass


if __name__ == '__main__':
	hmm = Hmm()
	hmm.hidden_states = ['Sunny','Cloudy','Rainy']
	hmm.observed_states = ['Damp','Soggy','Dry','Dryish']
	hmm.num_states = 3
	hmm.start_prob =[0.63,0.17,0.20]
	hmm.transition_matrix=[[0.5,0.25,0.25], [0.375,0.125,0.375],[0.125,0.675,0.375]]
	hmm.emission_matrix=[[0.15,0.05,0.6,0.20],[0.25,0.25,0.25,0.25],[0.35,0.5,0.05,0.10]]
	
      
	func = HmmFunc('cal')
	#given transition matrix, emission_matrix, init_prob , observerd_sequences to calculate probility
	prob = func.cal_likehood(['Damp','Soggy','Dry'], hmm)
	print prob
	
	#given transition matrix, emission_matrix, init_prob, observerd_sequences to calculate most prob hidden states
	most_prob = func.cal_most_prob_seq(['Damp','Soggy','Dry'], hmm)
	print most_prob
	#print func.backwards
	#print func.delta

标注

1. 参数估计未作,也即inference

参考

http://blog.csdn.net/likelet/article/details/7056068

http://blog.sina.com.cn/s/blog_953f8a550100zh35.html

http://www.52nlp.cn/?s=hmm

<<自然语言处理综述 -第二版>>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值