import numpy as np
#根据概率probVector随机产生索引index
def generateValueBasedSingleProbability( probVector,base=None ):
'''
根据概率probVector随机产生索引index
:param probVector:概率向量
:param base:父节点索引
:return:返回子节点的索引
example1:
prior = np.array([0.2,0.4,0.4])
generateValueBasedSingleProbability(prior)
相当于P(X=0)=0.2, P(X=0)=0.4, P(X=0)=0.4
example2:
transmat = np.array([[0.5,0.2,0.3],[0.3,0.5,0.2],[0.2,0.3,0.5]])
generateValueBasedSingleProbability(transmat, 0)
相当于P( X=0 | pare(X)=0 ) = 0.5,.2, P( X=1 | pare(X)=0 ) = 0 P( X=2 | pare(X)=0 ) = 0.3
'''
ndims = probVector.ndim
if ndims==1:
length = len(probVector)
prob = np.array(probVector)
index = np.random.choice(length,p=prob)
return index
elif ndims==2:
length = len(probVector[base,:])
prob = np.array(probVector[base,:])
index = np.random.choice(length,p=prob)
return index
else:
exit(-1)
return -1
#根据HMM模型产生一组观测序列
def sample_dhmm( prior, transmat, obsmat,T):
'''
根据HMM模型产生一组观测序列
:param prior:初始概率矩阵
:param transmat:转移概率矩阵
:param obsmat:观测概率矩阵
:param T:时间长度
:return:观测序列
hideVarList:隐变量序列无需返回,可以自己打印查看隐变量
ObsVarList:观测变量序列,返回
example:
prior = np.array([0.2,0.4,0.4])
transmat = np.array([[0.5,0.2,0.3],[0.3,0.5,0.2],[0.2,0.3,0.5]])
obsmat = np.array([[0.5,0.5],[0.4,0.6],[0.7,0.3]])
data = sample_dhmm(prior, transmat, obsmat, 20)
print(data)
'''
hideVarList = list()
ObsVarList = list()
for i in range(T):
if i==0:
hideVar = generateValueBasedSingleProbability(prior)
ObsVar = generateValueBasedSingleProbability(obsmat,hideVar)
hideVarList.append(hideVar)
ObsVarList.append(ObsVar)
else:
hideVar = generateValueBasedSingleProbability(transmat,hideVarList[i-1])
ObsVar = generateValueBasedSingleProbability(obsmat,hideVar)
hideVarList.append(hideVar)
ObsVarList.append(ObsVar)
return np.array(ObsVarList),np.array(hideVarList)
#前向传播算法
def forwardAlgorithm( prior, transmat, obsmat, obsSeq ):
'''
前向传播算法
:param prior:初始概率
:param transmat:转移概率
:param obsmat:观测概率
:param obsSeq:观测序列
:return:由前向概率因子组成的array
example:
prior = np.array([0.2,0.4,0.4])
transmat = np.array([[0.5,0.2,0.3],[0.3,0.5,0.2],[0.2,0.3,0.5]])
obsmat = np.array([[0.5,0.5],[0.4,0.6],[0.7,0.3]])
print(forwardAlgorithm(prior,transmat,obsmat,[0,1,0]))
结果为:
[[0.1 0.077 0.04187 ]
[0.16 0.1104 0.035512]
[0.28 0.0606 0.052836]]
每一列都代表当前观测节点对应的隐含变量在该观测序列下取值的概率
'''
rowLength = len(prior)
colLength = len(obsSeq)
result = np.zeros((rowLength,colLength),dtype="float64")
#1.初始化
result[:,0]=prior*obsmat[:,obsSeq[0]]
#2.前向传播
i = 1
while i < colLength:
preXProb = result[:, i-1]
curXProb2Dims = preXProb.reshape(rowLength,1)*transmat
curXProb1Dims = np.sum(curXProb2Dims,0,keepdims=False)
result[:, i]=curXProb1Dims*obsmat[:, obsSeq[i]]
i=i+1
return result
#后向传播算法
def backwardAlgorithm( prior, transmat, obsmat, obsSeq ):
'''
后向传播算法
:param prior:初始概率:1维数据
:param transmat:转移概率:二维数据
:param obsmat:观测概率:二维数据
:param obsSeq:观测序列:一维数据
:return:由后向概率因子组成的array:二维数据
example:
prior = np.array([0.2,0.4,0.4])
transmat = np.array([[0.5,0.2,0.3],[0.3,0.5,0.2],[0.2,0.3,0.5]])
obsmat = np.array([[0.5,0.5],[0.4,0.6],[0.7,0.3]])
print(backwardAlgorithm(prior,transmat,obsmat,[0,1,0]))
结果为:
[[0.25 0.5 1. ]
[0.24 0.4 1. ]
[0.21 0.7 1. ]]
每一列都代表当前观测节点对应的隐含变量在该观测序列下取值的概率
'''
rowLength = len(prior)
colLength = len(obsSeq)
result = np.zeros((rowLength,colLength),dtype="float64")
#1.初始化
result[:,colLength-1]=1
#2.后向传播
i = colLength-2
transmat1Dims = np.sum(transmat, 0, keepdims=False)
while i >= 0:
postXProb = result[:, i+1]
result[:, i] = transmat1Dims*obsmat[:, obsSeq[i+1]]*result[:,i+1]
i=i-1
return result
#参数学习Baum-Welch算法(在线学习效果太差,一般进行离线学习进行统计)
#Viterbi译码算法
def viterbiDecodingAlgorithm( prior, transmat, obsmat, obsSeq ):
rowLength = len(prior)
colLength = len(obsSeq)
resultProb = np.zeros((rowLength,colLength),dtype="float64")
preXvarIndex = np.zeros((rowLength,colLength),dtype="int")
#1.初始化
resultProb[:,0]= prior*obsmat[:, obsSeq[0]]
#preXvarIndex[:,0]默认初始化为0
#2.迭代
i = 1
while i < colLength:
tmp = resultProb[:,i-1].reshape((rowLength,1))*transmat
preXvarIndex[:,i] = np.argmax(tmp,axis=0)
maxEveryCol = np.max(tmp, axis=0, keepdims=False)
resultProb[:, i] = maxEveryCol*obsmat[:, obsSeq[i]]
i=i+1
#3.回溯求得最可能的隐藏性序列
result = np.zeros((colLength,),dtype="int")
Xvar = np.argmax(resultProb[:, colLength-1])
result[colLength-1]=Xvar
i = colLength-1
while i>0:
Xvar = preXvarIndex[:,i][Xvar]
result[i - 1] = Xvar
i=i-1
return result
if __name__ == "__main__":
prior = np.array([0.2,0.4,0.4])
transmat = np.array([[0.5,0.2,0.3],[0.3,0.5,0.2],[0.2,0.3,0.5]])
obsmat = np.array([[0.5,0.5],[0.4,0.6],[0.7,0.3]])
data,hideVarList = sample_dhmm(prior, transmat, obsmat, 20)
print(data)
print(hideVarList)
print(viterbiDecodingAlgorithm(prior,transmat,obsmat,data))
print(prior)