Modeling

最新推荐文章于 2021-11-12 16:56:35 发布

向爱我

最新推荐文章于 2021-11-12 16:56:35 发布

阅读量122

点赞数

分类专栏： R

本文链接：https://blog.csdn.net/weixin_51674826/article/details/117111804

版权

R 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

这篇博客介绍了如何使用OLS回归进行数据建模。通过`lm`函数建立y与x的关系，提取预测值、残差、截距和斜率，并计算标准误差。还展示了如何绘制拟合图以及重复实验来研究参数稳定性。此外，文章探讨了交互项在模型中的作用，包括一元和多元交互效应，并利用`seq_range`生成预测值网格。最后，通过`ggplot2`进行数据可视化，展示不同模型的效果。

摘要由CSDN通过智能技术生成

OLS regression

##make a regression
sim1_mod <- lm(y~x,sim1)

##extract the result
fitted(sim1_mod) #derive the predicted values
summary(sim1_mod)$residual #derive the residuals
summary(sim1_mod)$coefficients[1] #derive the predicted intercept
summary(sim1_mod)$coefficients[2] #detive the predicted slope
summary(sim1_mod)$sigma #derive the standard error
confint(sim1_mod, "x", level=0.95) 
confint(sim1_mod, "x", level=0.95)[1]
confint(sim1_mod, "x", level=0.95)[2] #derive the confidence interval

##draw a fitted plot
ggplot(sim1,aes(x,y)) +
  geom_point() +
  geom_smooth(method = "lm",se = F)

##repetition
toy_slope <- rerun(100,summary(lm(y~x,tibble(x=rnorm(10),y=rnorm(10))))$coefficients[2]) #repeat 100 times and extract all the slopes
#here the dataset is not allowed to form a new one as the target value will be fixed
as_vector(toy_slope) #transform it into a vector,and its mean or variance can be calculated

##visualization
toy_slope <-
  rerun(1000,summary(lm(y~x,tibble(x=rnorm(100),y=rnorm(100))))$coefficients[2]) %>%
  as_vector()
ggplot(data.frame(toy_slope)) +
  geom_histogram(aes(toy_slope),binwidth = 0.01)

Behind are "sluttish" orders

##basics
lm(y~x,sim1) #make a regression with y towards x from dataset `sim1`

#`add_predictions()` adds predictions
#`add_residuals()` adds residuals
#for example
sim1_mod <- lm(y~x,sim1)
sim1 %>% 
  data_grid(x) %>%  #`data_grid()` extracts the unique values of x
  add_predictions(sim1_mod) 
sim1 %>% 
  add_residuals(sim1_mod)

##interactions(one CA and one CO)

#show effects of different models
mod1 <- lm(y~x1+x2,sim3)
mod2 <- lm(y~x1*x2,sim3)
grid <- sim3 %>%
  data_grid(x1,x2) %>%
  gather_predictions(mod1,mod2)

ggplot(sim3,aes(x1,y,color=x2)) +
  geom_point() +
  geom_line(data=grid,aes(y=pred)) +
  facet_wrap(~model)

##interactions(two COs)

#`seq_range()` generates values between the smallest value and the biggest by a fixed distance
> seq_range(c(0.0123,0.9871),n=5)
[1] 0.0123 0.2560 0.4997 0.7434 0.9871
> seq_range(c(0.0123,1.9871),n=5, pretty=T) #make values "pretty"
[1] 0.0 0.5 1.0 1.5 2.0
> seq_range(c(0.0123,1.9871),n=5, trim=0.1) #trim the range by 10%
[1] 0.11104 0.55537 0.99970 1.44403 1.88836
> seq_range(c(0.0123,1.9871),n=5, expand=0.1) #expand the range by 10%
[1] -0.08644  0.45663  0.99970  1.54277  2.08584

#predict
mod1 <- lm(y~x1+x2,sim4)
mod2 <- lm(y~x1*x2,sim4)
grid <- sim4 %>%
  data_grid(
    x1 = seq_range(x1,5),
    x2 = seq_range(x2,5)
    ) %>%
  gather_predictions(mod1,mod2)