第一题:
- 设三维随机变量(X1,X2,X3)联合概率密度函数为fx1,x2,x3=x12+x22+x32,0≤xi≤1,i=1,2,3.
利用舍选法产生服从该分布的随机数100个(样本容量)。要求利用R软件生成该随机数100个,记录所需要的抽样次数,重复上面次数100次,绘画出100次抽样次数的箱线图,估计平均需要的抽样次数?
set.seed(123)
n_samples <- 100
n_trials <- 100
sample_counts <- numeric(n_trials)
f <- function(x1, x2, x3) {
return(x1^2 + x2^2 + x3^2)
}
max_f <- max(f(runif(1000000), runif(1000000), runif(1000000)))
for (i in 1:n_trials) {
count <- 0
while (TRUE) {
count <- count + 1
x1 <- runif(1)
x2 <- runif(1)
x3 <- runif(1)
U <- runif(1)
if (U <= f(x1, x2, x3) / max_f) {
sample_counts[i] <- count
break
}
}
}
boxplot(sample_counts, main="箱线图", ylab="抽样次数")
mean_attempts <- mean(sample_counts)
print(mean_attempts)
第二题:设总体概率密度函数为fx=α+1)xα,0<x<1,X1,X2,...,Xn,为其样本,未知参数α的矩估计量和最大似然估计分别是α=2X-11-X,α=-ni=1nln(Xi)-1,其中X是样本均值。请利用R软件在样本容量n=50以及真值α=0.7下,以MSE(均方误差)为标准,绘画出100次抽样的MSE箱线图,比较哪个更好?
set.seed(123)
n <- 50
alpha_true <- 0.7
n_samples <- 100
mse_moment <- numeric(n_samples)
mse_mle <- numeric(n_samples)
for (i in 1:n_samples) {
sample <- (alpha_true + 1) * runif(n)^alpha_true
x_bar <- mean(sample)
alpha_moment <- (2 * x_bar - 1) / (1 - x_bar)
alpha_mle <- -n / sum(log(sample)) - 1
mse_moment[i] <- (alpha_moment - alpha_true)^2
mse_mle[i] <- (alpha_mle - alpha_true)^2
}
boxplot(list(MSE_Moment = mse_moment, MSE_MLE = mse_mle),
names = c("MSE_Moment", "MSE_MLE"),
main = "MSE盒图",
ylab = "MSE")
mean_mse_moment <- mean(mse_moment)
mean_mse_mle <- mean(mse_mle)
# 输出结果
cat("Mean MSE for Moment Estimate:", mean_mse_moment, "\n")
cat("Mean MSE for MLE Estimate:", mean_mse_mle, "\n")
第三题:设X1,X2,...,Xn是来自总体N(μ,1)
的样本,现采用正态检验假设:H0:μ=0,H1:μ≠0.
请在样本容量n=50以及α=0.05
下,绘画出检验统计量nX
功效函数的图像,其中
是样本均值。并模拟估计μ=0.5
下检验统计量的功效?
n <- 50
mu <- 0.5
alpha <- 0.05
Z_alpha_2 <- qnorm(1 - alpha/2)
set.seed(123)
simulated_power <- numeric(10000)
for (i in 1:10000) {
sample <- rnorm(n, mean = mu, sd = 1)
X_bar <- mean(sample)
Z_stat <- sqrt(n) * X_bar
if (abs(Z_stat) > Z_alpha_2) {
simulated_power[i] <- 1
} else {
simulated_power[i] <- 0
}
}
power <- mean(simulated_power)
theta_values <- seq(-3, 3, length.out = 100)
power_values <- (1 - pnorm(abs(theta_values)/sqrt(n))) * 2 - 1
plot(theta_values, power_values, type = "l", col = "blue",
xlab = "Value of mu", ylab = "Power",
main = "试验统计量的幂函数")
cat("Simulated power at mu = 0.5:", power)
第四题:考虑以下定积分计算I=-11exdx,采用统计模拟方法计算:先产生100个随机数,然后再估计该定积分,最后重复200次,画出估计的箱线图,从图中比较估计方法优劣。采用的估计方法为:随机投点法、平均值法、重要抽样法(将ex
展成一阶然后正则化)、分层抽样法(将[-1,1]区间分为[-1,0]和[0,1]两层)。
set.seed(123)
n_simulations <- 200
n_points <- 100
# 定义四种估计方法
estimate_integral <- function(x, method) {
y <- exp(x)
if (method == "random_points") {
# 随机投点法
area <- sum(y) / n_points
} else if (method == "mean_value") {
# 平均值法
area <- mean(y)
} else if (method == "importance_sampling") {
# 重要抽样法
area <- sum(y / (exp(-x) + exp(x))) / 2
} else if (method == "stratified_sampling") {
# 分层抽样法
area1 <- mean(exp(runif(n_points / 2, -1, 0)))
area2 <- mean(exp(runif(n_points / 2, 0, 1)))
area <- (area1 + area2) * 2 / n_points
}
return(area)
}
results_random_points <- numeric(n_simulations)
results_mean_value <- numeric(n_simulations)
results_importance_sampling <- numeric(n_simulations)
results_stratified_sampling <- numeric(n_simulations)
# 进行模拟
for (i in 1:n_simulations) {
x <- runif(n_points, min = -1, max = 1)
results_random_points[i] <- estimate_integral(x, "random_points")
results_mean_value[i] <- estimate_integral(x, "mean_value")
results_importance_sampling[i] <- estimate_integral(x, "importance_sampling")
results_stratified_sampling[i] <- estimate_integral(x, "stratified_sampling")
}
# 绘制箱线图
par(mfrow = c(2, 2))
boxplot(results_random_points, main = "随机点法", ylab = "积分估计")
boxplot(results_mean_value, main = "平均值法", ylab = "积分估计")
boxplot(results_importance_sampling, main = "重要性抽样法", ylab = "积分估计")
boxplot(results_stratified_sampling, main = "分层抽样法", ylab = "积分估计")