目的
2022/3/18,复现博客 Gibbs sampling for Bayesian linear regression in Python,实现吉布斯采样估计线性回归参数
参考资料:
本地地址:E:\Research\OptA\MCMC\Gibbs
结果
样本点
采样结果:
频数分布直方图:
参数点估计:
beta_0 -0.640971
beta_1 1.946092
tau 1.246028
方差:
beta_0 0.245867
beta_1 0.109797
tau 0.254932
代码
#2022/3/18
#复现Gibbs sampling for Bayesian linear regression in Python
# https://kieranrcampbell.github.io/blog/2016/05/15/gibbs-sampling-bayesian-linear-regression.html
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10, 5)
def sample_beta_0(y, x, beta_1, tau, mu_0, tau_0):
N = len(y)
assert len(x) == N
precision = tau_0 + tau * N
mean = tau_0 * mu_0 + tau * np.sum(y - beta_1 * x)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_beta_1(y, x, beta_0, tau, mu_1, tau_1):
N = len(y)
assert len(x) == N
precision = tau_1 + tau * np.sum(x * x)
mean = tau_1 * mu_1 + tau * np.sum( (y - beta_0) * x)
mean /= precision
return np.random.normal(mean, 1 / np.sqrt(precision))
def sample_tau(y, x, beta_0, beta_1, alpha, beta):
N = len(y)
alpha_new = alpha + N / 2
resid = y - beta_0 - beta_1 * x
beta_new = beta + np.sum(resid * resid) / 2
return np.random.gamma(alpha_new, 1 / beta_new)
beta_0_true = -1
beta_1_true = 2
tau_true = 1
N = 50
x = np.random.uniform(low = 0, high = 4, size = N)
y = np.random.normal(beta_0_true + beta_1_true * x, 1 / np.sqrt(tau_true))
synth_plot = plt.plot(x, y, "o")
plt.xlabel("x")
plt.ylabel("y")
## specify initial values
init = {"beta_0": 0,
"beta_1": 0,
"tau": 2}
## specify hyper parameters
hypers = {"mu_0": 0,
"tau_0": 1,
"mu_1": 0,
"tau_1": 1,
"alpha": 2,
"beta": 1}
def gibbs(y, x, iters, init, hypers):
assert len(y) == len(x)
beta_0 = init["beta_0"]
beta_1 = init["beta_1"]
tau = init["tau"]
trace = np.zeros((iters, 3)) ## trace to store values of beta_0, beta_1, tau
for it in range(iters):
beta_0 = sample_beta_0(y, x, beta_1, tau, hypers["mu_0"], hypers["tau_0"])
beta_1 = sample_beta_1(y, x, beta_0, tau, hypers["mu_1"], hypers["tau_1"])
tau = sample_tau(y, x, beta_0, beta_1, hypers["alpha"], hypers["beta"])
trace[it,:] = np.array((beta_0, beta_1, tau))
trace = pd.DataFrame(trace)
trace.columns = ['beta_0', 'beta_1', 'tau']
return trace
iters = 1000
trace = gibbs(y, x, iters, init, hypers)
traceplot = trace.plot()
traceplot.set_xlabel("Iteration")
traceplot.set_ylabel("Parameter value")
trace_burnt = trace[int(iters / 2) : ]
hist_plot = trace_burnt.hist(bins = 30, layout = (1,3))
print(trace_burnt.median())
print(trace_burnt.std())
plt.show()
评述
难点在于条件分布的推导,对于一般的水文模型,可能很难推导出条件分布。