# PRML：二元变量分布

### 伯努利分布

p(x=1)=μ

Bern(x|μ)=μx(1μ)1x

E[x]var[x]=μ=μ(1μ)

### 伯努利分布的最大似然估计

p(D|μ)=n=1Np(xn|μ)=n=1Nμxn(1μ)1xn

lnp(D|μ)=n=1Nlnp(xn|μ)=n=1N{xnlnμ+(1xn)ln(1μ)}

μ$\mu$ 最大化对数似然，我们很容易得到

μML=1Nn=1Nxn

μML=mN

### 二项分布

Bin(m | N,μ)=(Nm)μm(1μ)Nm

(Nm)N!(Nm)!m!

m=0N(Nm)μm(1μ)Nm=(μ+1μ)N=1

import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
%matplotlib inline
from scipy.stats import binom

n = 20
mu = 0.6

X = binom.rvs(n, mu, size=10000)
fig, ax = plt.subplots()
ax.hist(X, bins=range(21), rwidth=0.7)
ax.set_xlabel("$m$", fontsize='x-large')
# ax.set_xlim(0, 21)
# ax.set_yticks(np.arange(0, 0.31, 0.1))
# ax.set_xticks(np.arange(0.5, 10.6, 1))
# ax.set_xticklabels(range(21))
ax.set_title(r'$N = 20, \mu=0.6$', fontsize='x-large')
plt.show()

E[m]var[m]=Nμ=Nμ(1μ)

## beta 分布

Beta(μ | a,b)=Γ(a+b)Γ(a)Γ(b)μa1(1μ)b1

Γ(x)0ux1eudu

Γ(x+1)Γ(1)=0uxeudu=[euux]0+x0ux1eudu=0+xΓ(x)=xΓ(x)=0eudu=[eu]0=1

Γ(a)Γ(b)=0xa1exdx0yb1eydy

t=y+x,dt=dy$t = y + x, dt = dy$，则有：

Γ(a)Γ(b)=0xa1{x(tx)b1etdt}dx

Γ(a)Γ(b)=0t0xa1(tx)b1etdxdt

x=tμ,dx=tdμ$x = t\mu, dx = td\mu$，则有

Γ(a)Γ(b)=0etta1tb1tdt10μa1(1μ)b1dμ=Γ(a+b)10μa1(1μ)b1dμ

10Beta(μ | a,b)dμ=1

E[μ]var[μ]=aa+b=ab(a+b)2(a+b+1)

10μa1(1μ)b1dμ=Γ(a)Γ(b)Γ(a+b)

E[μ]=Γ(a+b)Γ(a)Γ(b)μa+11(1μ)b1dμ=Γ(a+b)Γ(a)Γ(b)Γ(a+1)Γ(b)Γ(a+b+1)=aa+b

E[μ2]=Γ(a+b)Γ(a)Γ(b)μa+21(1μ)b1dμ=Γ(a+b)Γ(a)Γ(b)Γ(a+2)Γ(b)Γ(a+b+2)=a(a+1)(a+b)(a+b+1)

a$a$b$b$ 叫做超参数，因为它们控制分布的参数 μ$\mu$

from scipy.stats import beta

fig, axes = plt.subplots(2, 2,figsize=(10, 7))

axes = axes.flatten()

A = (0.1, 1, 2, 8)
B = (0.1, 1, 3, 4)

xx = np.linspace(0, 1, 100)

for a, b, ax in zip(A, B, axes):
yy = beta.pdf(xx, a, b)
ax.plot(xx, yy, 'r')
ax.set_ylim(0, 3)

ax.set_xticks([0, 0.5, 1])
ax.set_xticklabels(["$0$", "$0.5$", "$1$"], fontsize="large")
ax.set_yticks([0, 1, 2, 3])
ax.set_yticklabels(["$0$", "$1$", "$2$", "$3$"], fontsize="large")
ax.set_xlabel("$\mu$", fontsize="x-large")

ax.text(0.1, 2.5, r"$a={}$".format(a), fontsize="x-large")
ax.text(0.1, 2.2, r"$b={}$".format(b), fontsize="x-large")

p(μ | m,l,a,b)μm+a+1(1μ)l+b1

p(μ | m,l,a,b)Beta(μ | a+m,b+l)

xx = np.linspace(0, 1, 100)

fig, axes = plt.subplots(1, 3, figsize=(10, 2))

axes = axes.flatten()

axes[0].plot(xx, beta.pdf(xx, 2, 2), 'r')
axes[0].set_ylim(0, 2)
axes[0].text(0.1, 1.6, "prior", fontsize="x-large")
axes[0].set_xlabel("$\mu$", fontsize="x-large")
axes[0].set_xticks([0, 0.5, 1])
axes[0].set_yticks([0, 1, 2])

axes[1].plot(xx, xx)
axes[1].set_ylim(0, 2)
axes[1].text(0.1, 1.6, "likelihood", fontsize="x-large")
axes[1].set_xlabel("$\mu$", fontsize="x-large")
axes[1].set_xticks([0, 0.5, 1])
axes[1].set_yticks([0, 1, 2])

axes[2].plot(xx, beta.pdf(xx, 3, 2), 'r')
axes[2].set_ylim(0, 2)
axes[2].text(0.1, 1.6, "posterior", fontsize="x-large")
axes[2].set_xlabel("$\mu$", fontsize="x-large")
axes[2].set_xticks([0, 0.5, 1])
axes[2].set_yticks([0, 1, 2])

plt.show()

p(x=1|D)=10p(x=1|μ)p(μ|D)dμ10μp(μ|D)=E[μ|D]

p(x=1|D)=m+am+a+l+b=m+aN+a+b

m,l$m, l \to \infty$ 时，有

p(x=1|D)=m+am+a+l+b=m+aN+a+bmN

Eθ[θ]=ED[Eθ[θ|D]]

varθ[θ]=ED[varθ[θ|D]]+varD[Eθ[θ|D]]