统计学习方法_隐马尔可夫模型HMM实现

这里用到的数据集是三角波,使用长度20的序列训练100次,生成长度为100的序列。HMM的初始化非常重要,这里采用随机初始化。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import csv
import random

import numpy as np
import matplotlib.pyplot as plt

class HMM(object):
	def __init__(self, N, M):
		self.A = np.zeros((N, N))  # 状态转移概率矩阵
		self.B = np.zeros((N, M))  # 观测概率矩阵
		self.Pi = np.array([1.0 / N] * N)  # 初始状态概率矩阵

		self.N = N  # 可能的状态数
		self.M = M  # 可能的观测数

	def init(self):
		'''
			随机生成A,B,Pi
			并且保证每行相加为1
		'''
		for i in range(self.N):
			# 这里的100只是随便选取的数,为了生成概率
			randomlist = [random.randint(0, 100) for t in range(self.N)]
			Sum = sum(randomlist)
			for j in range(self.N):
				self.A[i][j] = randomlist[j] / Sum

		for i in range(self.N):
			randomlist = [random.randint(0, 100) for t in range(self.M)]
			Sum = sum(randomlist)
			for j in range(self.M):
				self.B[i][j] = randomlist[j] / Sum

	def forward(self):
		'''
			前向算法,从alpha定义理解,这里的时刻是从0到T-1,书上是1到T
		'''
		self.alpha = np.zeros((self.T, self.N))

		# 公式(10.15)
		for i in range(self.N):
			self.alpha[0][i] = self.Pi[i] * self.B[i][self.O[0]]

		# 公式(10.16)
		for t in range(1, self.T):  # 时刻从1从T-1
			for i in range(self.N):
				sum = 0
				for j in range(self.N):
					sum += self.alpha[t - 1][j] * self.A[j][i]
				self.alpha[t][i] = sum * self.B[i][self.O[t]]

	def backward(self):
		'''
			后向算法,从beta定义理解
		'''
		self.beta = np.zeros((self.T, self.N))

		# 公式(10.19)
		for i in range(self.N):
			self.beta[self.T - 1][i] = 1  # 规定最终时刻到达状态qi概率为1

		# 公式(10.20)
		for t in range(self.T - 2, -1, -1):
			for i in range(self.N):
				for j in range(self.N):  # 遍历状态qj
					self.beta[t][i] += self.A[i][j] * self.B[j][self.O[t + 1]] * self.beta[t + 1][j]

	def cal_ksi(self, i, j, t):
		'''
			公式(10.26)
		'''
		numerator = self.alpha[t][i] * self.A[i][j] * self.B[j][self.O[t + 1]] * self.beta[t + 1][j]
		denominator = 0

		for i in range(self.N):
			for j in range(self.N):
				denominator += self.alpha[t][i] * self.A[i][j] * self.B[j][self.O[t + 1]] * self.beta[t + 1][j]

		return numerator / denominator

	def cal_gama(self, i, t):
		'''
			公式(10.24)
		'''
		numerator = self.alpha[t][i] * self.beta[t][i]
		denominator = 0

		for j in range(self.N):
			denominator += self.alpha[t][j] * self.beta[t][j]

		return numerator / denominator

	def train(self, O, MaxSteps=100):
		'''
			Baum-Welch算法估计HMM模型中的参数
			无需知道隐藏状态是哪些,只需要知道隐藏状态一共有多少
		'''
		self.T = len(O)  # T是时间,也是观察序列的长度
		self.O = O  # O是观察序列

		self.init()  # 初始化

		step = 0
		while step < MaxSteps:  #  递推
			step += 1
			# print('step = ', step)
			tmp_A = np.zeros((self.N, self.N))
			tmp_B = np.zeros((self.N, self.M))
			tmp_pi = np.array([0.0] * self.N)

			self.forward()  # 前向算法,生成alpha
			self.backward()  # 后向算法,生成beta

			# 计算aij的参数估计,公式(10.39)
			for i in range(self.N):
				for j in range(self.N):
					numerator = 0.0  # 分子
					denominator = 0.0  # 分母
					for t in range(self.T - 1):
						numerator += self.cal_ksi(i, j, t)
						denominator += self.cal_gama(i, t)
					tmp_A[i][j] = numerator / denominator

			# 计算bjk的参数估计,公式(10.40)
			for j in range(self.N):
				for k in range(self.M):
					numerator = 0.0
					denominator = 0.0
					for t in range(self.T):
						if k == self.O[t]:
							numerator += self.cal_gama(j, t)
						denominator += self.cal_gama(j, t)
					tmp_B[j][k] = numerator / denominator

			# 计算(pi)i的参数估计,公式(10.41)
			for i in range(self.N):
				tmp_pi[i] = self.cal_gama(i, 0)

			self.A = tmp_A
			self.B = tmp_B
			self.Pi = tmp_pi

	def generate(self, length):
		I = []  # 生成的状态序列

		# 首先生成初始状态
		ran = random.randint(0, 1000) / 1000.0
		i = 0
		while self.Pi[i] < ran or self.Pi[i] < 0.0001:
			ran -= self.Pi[i]
			i += 1
		I.append(i)

		# 生成状态序列
		for i in range(1, length):
			last = I[-1]
			ran = random.randint(0, 1000) / 1000.0
			j = 0
			while self.A[last][j] < ran or self.A[last][j] < 0.0001:
				ran -= self.A[last][j]
				j += 1
			I.append(j)
		print(I)

		Y = []  # 生成的观测序列
		for i in range(length):
			k = 0
			ran = random.randint(0, 1000) / 1000.0
			while self.B[I[i]][k] < ran or self.B[I[i]][k] < 0.0001:
				ran -= self.B[I[i]][k]
				k += 1
			Y.append(k)

		return Y

def triangle(length):
	'''
		三角波
	'''
	X = [i for i in range(length)]
	Y = []

	for x in X:
		x = x % 6
		if x <= 3:
			Y.append(x)
		else:
			Y.append(6 - x)
	return X, Y

def show_data(x, y):
	plt.plot(x, y, 'g')
	plt.show()

if __name__ == '__main__':
	hmm = HMM(10, 4)  # 10个状态,4个观察
	tri_x, tri_y = triangle(20)
	print(tri_x)
	print(tri_y)

	# 隐藏序列就是X轴坐标序列
	# 观察序列就是Y轴坐标序列
	hmm.train(tri_y)
	y = hmm.generate(100)  # 利用HMM生成100个数
	print(y)
	x = [i for i in range(100)]
	show_data(x, y)

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值