- 蒙特卡洛技术
在这个问题上,我们将用蒙特卡洛技术做一些练习。
a) 从高斯分布中抽取一个N=200个随机数的样本,μ=0,σ=2.5。请使用非参数自举法(我的幻灯片第36页)来创建一个有N=200个随机数的模拟样本。将原始样本和模拟样本的直方图绘制在同一图中。计算它们的平均值和标准偏差。
b) 模拟样本与原始样本在统计学上是否一致?请设计一个统计学测试来回答这个问题。
c) 重复上述分析,但对N=20。
import numpy as np
import matplotlib.pyplot as plt
mu = 0
sigma = 2.5
N = 200
M = 100#蒙特卡罗实验次数
#Monte_Carlo,3种情况下的Monte_Carlo实验:1.改变mu,计算均值与标准差的RMSE,2.改变sigam,3.改变N
#以此观察在不同情况下,模拟抽样与原始抽样之间统计量(对于高斯分布即为:均值(mu),标准差(sigma))在M次蒙特卡罗实验下的均方根误差(RMSE)
#RMSE越小代表两个抽样之间的统计量越接近。RMSE小于一定值时,可以认为两个样本序列在统计学上是一致的!
mu_iteration = np.arange(0,5,0.2)#终点值为0,起点值为5,步长为0.2
sigma_iteration = np.arange(1,5,0.2)
N_iteration = np.arange(20,1000,20)
def Boostrap(Original_samples,N):
Boostrap__samples = []
for i in range(N):
Boostrap__samples.append(np.random.choice(Original_samples, size=1))
return np.array(Boostrap__samples)
def Plot_hist(Original_samples,Boostrap__samples):
plt.figure()
plt.hist(x=Original_samples, # 绘图数据
bins=100, # 指定直方图的条形数为100个
edgecolor='w', # 指定直方图的边框色
color=['c'], # 指定直方图的填充色
label=['Original_samples'], # 为直方图呈现图例
density=False, # 是否将纵轴设置为密度,即频率
alpha=0.6, # 透明度
rwidth=1, # 直方图宽度百分比:0-1
stacked=False) # 当有多个数据时,是否需要将直方图呈堆叠摆放,默认水平摆放
plt.hist(x=Boostrap__samples, # 绘图数据
bins=100, # 指定直方图的条形数为100个
edgecolor='w', # 指定直方图的边框色
color=['r'], # 指定直方图的填充色
label=['Boostrap__samples'], # 为直方图呈现图例
density=False, # 是否将纵轴设置为密度,即频率
alpha=0.6, # 透明度
rwidth=1, # 直方图宽度百分比:0-1
stacked=False) # 当有多个数据时,是否需要将直方图呈堆叠摆放,默认水平摆放
ax = plt.gca() # 获取当前子图
ax.spines['right'].set_color('none') # 右边框设置无色
ax.spines['top'].set_color('none') # 上边框设置无色
# 显示图例
plt.legend()
#
plt.xlabel("Value")
plt.ylabel("frequency")
# 显示图形
plt.show()
def Statistics(Original_samples,Boostrap__samples):
Original_samples_mean = np.mean(Original_samples)
Original_samples_std = np.std(Original_samples)
Boostrap__samples_mean = np.mean(Boostrap__samples)
Boostrap__samples_std = np.std(Boostrap__samples)
print('Original_samples_mean:', Original_samples_mean)
print('Original_samples_std:', Original_samples_std)
print('Boostrap__samples_mean:', Boostrap__samples_mean)
print('Boostrap__samples_std:', Boostrap__samples_std)
return Original_samples_mean,Original_samples_std,Boostrap__samples_mean,Boostrap__samples_std
def RMSE_calculate(M,mu,sigma,N):
MSE_mean = 0
MSE_std = 0
for i in range(M):
Original_samples = np.random.normal(mu, sigma, N)
Boostrap__samples = Boostrap(Original_samples, N)
Original_mean = np.mean(Original_samples)
Original_std = np.mean(Original_samples)
Boostrap_mean = np.mean(Boostrap__samples)
Boostrap_std = np.mean(Boostrap__samples)
MSE_mean += np.square(Boostrap_mean-Original_mean)
MSE_std += np.square(Boostrap_std - Original_std)
RMSE_mean = np.sqrt(MSE_mean / M)
RMSE_std = np.sqrt(MSE_std / M)
return RMSE_mean,RMSE_std
def Monte_Carlo(M,mu,sigma,N):
RMSE_mu_mean = []
RMSE_mu_std = []
RMSE_sigma_mean = []
RMSE_sigma_std = []
RMSE_N_mean = []
RMSE_N_std = []
for mu_item in mu:
RMSE_mean_temp,RMSE_std_temp = RMSE_calculate(M, mu_item, 0.25, 200)
RMSE_mu_mean.append(RMSE_mean_temp)
RMSE_mu_std.append(RMSE_std_temp)
for sigma_item in sigma:
RMSE_mean_temp,RMSE_std_temp = RMSE_calculate(M, 0, sigma_item, 200)
RMSE_sigma_mean.append(RMSE_mean_temp)
RMSE_sigma_std.append(RMSE_std_temp)
for N_item in N:
RMSE_mean_temp, RMSE_std_temp = RMSE_calculate(M, 0, 0.25, N_item)
RMSE_N_mean.append(RMSE_mean_temp)
RMSE_N_std.append(RMSE_std_temp)
return np.array(RMSE_mu_mean),np.array(RMSE_mu_std),np.array(RMSE_sigma_mean),np.array(RMSE_sigma_std),\
np.array(RMSE_N_mean),np.array(RMSE_N_std)
def Plot_Monte_Carlo_Rsult(RMSE_mu_mean,RMSE_mu_std,RMSE_sigma_mean,RMSE_sigma_std,RMSE_N_mean,RMSE_N_std):
plt.figure()
plt.plot(mu_iteration,RMSE_mu_mean)
plt.title('The mean of the Monte Carlo experiment varies with mu')
plt.ylabel('RMSE')
plt.xlabel('mu')
plt.grid()
plt.show()
plt.figure()
plt.plot(mu_iteration,RMSE_mu_std)
plt.title('The standard deviation of the Monte Carlo experiment varies with mu')
plt.ylabel('RMSE')
plt.xlabel('mu')
plt.grid()
plt.show()
plt.figure()
plt.plot(sigma_iteration,RMSE_sigma_mean)
plt.title('The mean of the Monte Carlo experiment varies with sigma')
plt.ylabel('RMSE')
plt.xlabel('sigma')
plt.grid()
plt.show()
plt.figure()
plt.plot(sigma_iteration,RMSE_sigma_std)
plt.title('The standard deviation of the Monte Carlo experiment varies with sigma')
plt.ylabel('RMSE')
plt.xlabel('sigma')
plt.grid()
plt.show()
plt.figure()
plt.plot(N_iteration,RMSE_N_mean)
plt.title('The mean of the Monte Carlo experiment varies with N')
plt.ylabel('RMSE')
plt.xlabel('N')
plt.grid()
plt.show()
plt.figure()
plt.plot(N_iteration,RMSE_N_std)
plt.title('The standard deviation of the Monte Carlo experiment varies with N')
plt.ylabel('RMSE')
plt.xlabel('N')
plt.grid()
plt.show()
if __name__ == '__main__':
np.random.seed(0)
#以下为3A
Original_samples = np.random.normal(mu, sigma, N)
Boostrap__samples = Boostrap(Original_samples,N)
Plot_hist(Original_samples,Boostrap__samples)
Statistics(Original_samples,Boostrap__samples)
#以下为3B
#Monte_Carlo,3种情况下的Monte_Carlo实验:1.改变mu,计算均值与标准差的RMSE,2.改变sigam,3.改变N
RMSE_mu_mean,RMSE_mu_std,RMSE_sigma_mean,RMSE_sigma_std,RMSE_N_mean,RMSE_N_std = Monte_Carlo(M,mu_iteration,sigma_iteration,N_iteration)
Plot_Monte_Carlo_Rsult(RMSE_mu_mean,RMSE_mu_std,RMSE_sigma_mean,RMSE_sigma_std,RMSE_N_mean,RMSE_N_std)
#3C将N改为20,程序同3A