马尔可夫矩阵,马尔可夫决策过程

def transitionprobability(A,display=False):

    left , right = [],[]
    initA = np.array(A)
    An = initA.shape[0]
    unitVector = np.ones(An)
    distionA = np.array([j for j in {i:0 for i in A}.keys()])
    probability = lambda  elements , n : {e:(elements==e).dot(np.ones(n))/n for e in elements}
    prob_list = lambda prob_dict:[prob_dict[i] for i in prob_dict]

    transfer_matrix_left , transfer_matrix_right,nameList_left,nameList_right = [],[],[],[]
    for e in distionA:
        one_E_index = np.where(initA==e)[0]
        emptyZero = np.zeros(distionA.shape[0])
        bool_left = one_E_index[one_E_index -1 >= 0]
        bool_right = one_E_index[one_E_index + 1 < An]
        left = bool_left - 1
        right = bool_right + 1
        if len(left) > 0:
            nameList_left.append(e)
            init_dict_left = {i : 0 for i in initA}
            left_elements = initA[left]
            ln = left_elements.shape[0]
            prob_left = probability(left_elements,ln)
            init_dict_left.update(prob_left)
            transfer_matrix_left.append(prob_list(init_dict_left))
            if display:
                print('left:',e,init_dict_left)
        if len(left)==0:
            transfer_matrix_left.append(emptyZero)
            print('loss left:',e,emptyZero)
        if len(right) > 0:
            nameList_right.append(e)
            init_dict_right = {i : 0 for i in initA}
            right_elements = initA[right]
            rn = right_elements.shape[0]
            prob_right = probability(right_elements,rn)
            init_dict_right.update(prob_right)
            transfer_matrix_right.append(prob_list(init_dict_right))
            if display:
                print('right:',e,init_dict_right)
        if len(right) == 0:
            transfer_matrix_right.append(emptyZero)
            print('loss right:',e,emptyZero)
         
    return {'cloumn_names_left':init_dict_left.keys()
    ,'transfer_matrix_left':np.array(transfer_matrix_left)
    ,'cloumn_names_right':init_dict_right.keys()
    ,'transfer_matrix_right':np.array(transfer_matrix_right)}
transitionprobability(['我','和','你','我','你','h'])


def markov(init_array,transfer_matrix,epsilone=None,maxloop=np.inf,displiyMatrixTmp = 100000):
    #转移矩阵
    results = [np.ones_like(init_array)]
    restmp = init_array #迭代结果

    timestep = -1
    lossList = []
    convergence_true = restmp.shape[0]

    while timestep <= maxloop:
        timestep += 1

        res = transfer_matrix.T.dot(restmp)
        results.append(res)
        restmp = res
        results_array = np.array(results)
        convergence = (results_array[-1]==results_array[-2])
        lossList.append(convergence.sum())
        if timestep==displiyMatrixTmp*(timestep//displiyMatrixTmp):
            print({'time':timestep,'loss:':convergence.sum()})
        if convergence.sum() == epsilone or convergence.all() == epsilone :
            print("loss:",convergence.sum(),convergence.all())
            break
    return results_array[1:],timestep,lossList[1:]


def mrkoff_process(List,display=False,epsilone=None,maxloop=1000,displiyMatrixTmp=1000,direction="right"):

    transfer = transitionprobability(List,display=display)
    if direction == 'left':
        left = transfer['transfer_matrix_left']
        loop_left = transfer['cloumn_names_left']
        print('left',pd.DataFrame(left,columns=loop_left))
        leftn = left.shape[0]
        init_array_left = np.ones(leftn)/leftn
        letf_markov = markov(init_array_left,left,epsilone,maxloop,displiyMatrixTmp)
        result = tuple(list(letf_markov)+[loop_left])

    if direction == 'right':
        right = transfer['transfer_matrix_left']
        loop_right = transfer['cloumn_names_left']
        
        rightn = right.shape[0]
        init_array_right = np.ones(rightn)/rightn
        print('right',pd.DataFrame(right,columns=loop_right))
        right_markov = markov(init_array_right,right,epsilone,maxloop,displiyMatrixTmp)
        result = tuple(list(right_markov)+[loop_right])
    return result


S = list("""吃睡打吃打睡吃睡""")

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值