R语言|计算统计作业练习1

下面在此分享一下一次课程作业的答题思路及个人答题结果。如有错误欢迎指正。

*本文题目非原创。

Q:对于law数据计算GPA和 LSAT的相关系数的偏差,并计算经过偏差调整后的相关系数以及相关系数的置信区间(使用非参数的bootstrap,置信区间需要使用percentile 和BCa的方法,R和Python编程选一种使用)。

一、数据查看

LSAT

GPA

576

3.39

635

3.3

558

2.81

578

3.03

666

3.44

580

3.07

555

3

661

3.43

651

3.36

605

3.13

653

3.12

575

2.74

545

2.76

572

2.88

594

2.96

二、相关系数的偏差计算

我们运用非参数的bootstrap算法进行计算。

将不同B次下计算的结果填入下表并绘制曲线图如下:

B次

25

50

100

200

400

800

1600

3200

bias

0.018980788

0.009771031

-0.031498300

-0.013222192

-0.008180624

-0.004801327

-0.010277039

-0.008612720

三、经偏差调整后的相关系数计算

根据θ=θ-bias可算出经过偏差调整后的相关系数为:

B次

25

50

100

200

400

800

1600

3200

r

0.7573937

0.7666035

0.8078728

0.7895967

0.7845551

0.7811758

0.7866515

0.7849872

四、相关系数的置信区间计算

B

置信区间图

25

50

100

200

400

800

1600

3200

 

 五、附录

set.seed(11)
# Nonparametric bootstrap for the GPA_LSAT data

rm(list = ls())
# install.packages('bootstrap')
library(bootstrap)
library(plotly)
data(law82) 
#names(law)
# plot the scatter 
p=plot_ly(law82,x=~LSAT,y=~GPA,type="scatter",mode="markers");
p

# ==============================================================================
# bootstrap to calculate the bias of correlation coefficients
data(law)
data_treat=law
n=dim(data_treat)[1]
B=c(25,50,100,200,400,800,1600,3200)
corr_sample = cor(data_treat$LSAT,data_treat$GPA) # Sample correlation


corr_bias_boot=NULL    # bootstrap bias vector
corr_correct=NULL
for(i in 1:length(B))
{
  corr_boot=NULL     # bootstrap samples vector for specific B
  for(b in 1:B[i])
  {
    boot_ID=sample(1:n,n,replace=T) # nonparametric bootstrap
    data_boot=data_treat[boot_ID,]  
    corr_boot=c(corr_boot,cor(data_boot$LSAT,data_boot$GPA))
  }
  corr_bias_boot=c(corr_bias_boot,(sum(corr_boot))/B[i]-corr_sample)
  corr_correct = c(corr_correct, corr_sample-(sum(corr_boot))/B[i]+ corr_sample)
}

# Show the scatter plot
corr_correct
boot_result=data.frame(B=B,corr_correct=corr_correct) 
p=plot_ly(boot_result,x=~B,y=~corr_correct,type="scatter",mode="markers");
p%>%add_lines() 


# Show the scatter plot
corr_bias_boot
boot_result=data.frame(B=B,corr_bias_boot=corr_bias_boot) 
p=plot_ly(boot_result,x=~B,y=~corr_bias_boot,type="scatter",mode="markers");
p%>%add_lines() 

# Show the histogram
p=plot_ly(x=~corr_boot,type="histogram")%>%
  layout(title = "histogram of bootstrap samples",  
         yaxis = list(title = "freq"))
p

# ==============================================================================
# improved estimate of corr

B=3200 # B=c(25,50,100,200,400,800,1600,3200)
n=15
data_treat=law
alpha = 0.1
corr_boot=NULL     # bootstrap samples vector for specific B
corr_correct=NULL
for(b in 1:B)
{
    boot_ID=sample(1:n,n,replace=T) # nonparametric bootstrap
    data_boot=data_treat[boot_ID,]  
    corr_boot=c(corr_boot,cor(data_boot$LSAT,data_boot$GPA))
}

# ==============================================================================
# bootstrap-percentile CI         
CI_boot_per_l = sort(corr_boot)[B*alpha/2]
CI_boot_per_r = sort(corr_boot)[B*(1-alpha/2)]


# BCa
corr_sample = cor(law$LSAT,law$GPA) # Sample correlation

z_0 = qnorm(length(corr_boot[corr_boot < corr_sample])/B)
theta_i = NULL
for(i in 1:n)
{
  x_i = data_treat[-i]
  theta_i = c(theta_i,var(x_i))
}
theta_hat = mean(theta_i)
hat_a = (sum((theta_hat-theta_i)^3))/(6*(sum((theta_hat-theta_i)^2))^(3/2))

alpha_1 = pnorm(z_0+((z_0+qnorm(alpha/2))/(1-hat_a*(z_0+qnorm(alpha/2)))))
alpha_2 = pnorm(z_0+((z_0+qnorm(1-alpha/2))/(1-hat_a*(z_0+qnorm(1-alpha/2)))))

# BCa CI         
CI_boot_BCa_l = sort(corr_boot)[B*alpha_1]
CI_boot_BCa_r = sort(corr_boot)[B*alpha_2]

# plot      
hist(corr_boot, main = "Histogram of the bootstrap samples and CI")
abline(v = c(CI_boot_per_l,CI_boot_per_r), lty = 1, col = 'red')
abline(v = c(CI_boot_BCa_l,CI_boot_BCa_r), lty = 1, col = 'blue')
legend("topright", c("boot-per","boot_BCa"), lty = c(1,1), col = c("red","blue"))

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值