180116 EM算法资料整理（博客、论文、工具包、视频、书籍、代码，更新ing）

最新推荐文章于 2022-07-15 23:05:51 发布

GuokLiu

最新推荐文章于 2022-07-15 23:05:51 发布

阅读量1.3k

点赞数 3

分类专栏： Pre-Knowledges Algorithms

本文链接：https://blog.csdn.net/qq_33039859/article/details/79078116

版权

Pre-Knowledges 同时被 2 个专栏收录

42 篇文章 1 订阅

订阅专栏

Algorithms

3 篇文章 0 订阅

订阅专栏

# -*- coding: utf-8 -*-
"""
Created on Mon Jan 15 18:58:37 2018

@author: brucelau
"""

import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('ggplot')
np.random.seed(1234)

np.set_printoptions(formatter={'all':lambda x: '%.3f' % x})
from IPython.display import Image
from numpy.core.umath_tests import matrix_multiply as mm
from scipy.optimize import minimize
from scipy.stats import bernoulli, binom
from scipy import stats

#%%
# 硬币投掷结果观测序列
observations = np.array([[1, 0, 0, 0, 1, 1, 0, 1, 0, 1],
                         [1, 1, 1, 1, 0, 1, 1, 1, 1, 1],
                         [1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
                         [1, 0, 1, 0, 0, 0, 1, 1, 0, 0],
                         [0, 1, 1, 1, 0, 1, 1, 1, 0, 1]])

coin_A_pmf_observation_1 = binom.pmf(5,10,0.6)
coin_B_pmf_observation_1 = binom.pmf(5,10,0.5)
normalized_coin_A_pmf_observation_1 = coin_A_pmf_observation_1/(coin_A_pmf_observation_1+coin_B_pmf_observation_1)
normalized_coin_B_pmf_observation_1 = coin_B_pmf_observation_1/(coin_A_pmf_observation_1+coin_B_pmf_observation_1)

print ("%0.1f" %(normalized_coin_A_pmf_observation_1))
print ("%0.1f" %(normalized_coin_B_pmf_observation_1))

#%%
def em_single(priors, observations):
    """
    EM算法单次迭代
    Arguments
    ---------
    priors : [theta_A, theta_B]
    observations : [m X n matrix]

    Returns
    --------
    new_priors: [new_theta_A, new_theta_B]
    :param priors:
    :param observations:
    :return:
    """
    counts = {'A': {'H': 0, 'T': 0}, 'B': {'H': 0, 'T': 0}}
    theta_A = priors[0]
    theta_B = priors[1]
    # E step
    for observation in observations:
        len_observation = len(observation)
        num_heads = observation.sum()
        num_tails = len_observation - num_heads
        contribution_A = stats.binom.pmf(num_heads, len_observation, theta_A)
        contribution_B = stats.binom.pmf(num_heads, len_observation, theta_B)   # 两个二项分布
        weight_A = contribution_A / (contribution_A + contribution_B)
        weight_B = contribution_B / (contribution_A + contribution_B)
        # 更新在当前参数下A、B硬币产生的正反面次数
        counts['A']['H'] += weight_A * num_heads
        counts['A']['T'] += weight_A * num_tails
        counts['B']['H'] += weight_B * num_heads
        counts['B']['T'] += weight_B * num_tails
    # M step
    new_theta_A = counts['A']['H'] / (counts['A']['H'] + counts['A']['T'])
    new_theta_B = counts['B']['H'] / (counts['B']['H'] + counts['B']['T'])
    return [new_theta_A, new_theta_B]
#%%
def em(observations, prior, tol=1e-6, iterations=10000):
    """
    EM算法
    :param observations: 观测数据
    :param prior: 模型初值
    :param tol: 迭代结束阈值
    :param iterations: 最大迭代次数
    :return: 局部最优的模型参数
    """
    import math
    iteration = 0
    while iteration < iterations:
        new_prior = em_single(prior, observations)
        delta_change = np.abs(prior[0] - new_prior[0])
        print('The new_prior valuers are:',new_prior)
        if delta_change < tol:
            break
        else:
            prior = new_prior
            iteration += 1
    return [new_prior, iteration]

theta1,theta2 = em(observations,[0.6,0.5])
print(theta1,theta2)