OLS regression
##make a regression
sim1_mod <- lm(y~x,sim1)
##extract the result
fitted(sim1_mod) #derive the predicted values
summary(sim1_mod)$residual #derive the residuals
summary(sim1_mod)$coefficients[1] #derive the predicted intercept
summary(sim1_mod)$coefficients[2] #detive the predicted slope
summary(sim1_mod)$sigma #derive the standard error
confint(sim1_mod, "x", level=0.95)
confint(sim1_mod, "x", level=0.95)[1]
confint(sim1_mod, "x", level=0.95)[2] #derive the confidence interval
##draw a fitted plot
ggplot(sim1,aes(x,y)) +
geom_point() +
geom_smooth(method = "lm",se = F)
##repetition
toy_slope <- rerun(100,summary(lm(y~x,tibble(x=rnorm(10),y=rnorm(10))))$coefficients[2]) #repeat 100 times and extract all the slopes
#here the dataset is not allowed to form a new one as the target value will be fixed
as_vector(toy_slope) #transform it into a vector,and its mean or variance can be calculated
##visualization
toy_slope <-
rerun(1000,summary(lm(y~x,tibble(x=rnorm(100),y=rnorm(100))))$coefficients[2]) %>%
as_vector()
ggplot(data.frame(toy_slope)) +
geom_histogram(aes(toy_slope),binwidth = 0.01)
Behind are "sluttish" orders
##basics
lm(y~x,sim1) #make a regression with y towards x from dataset `sim1`
#`add_predictions()` adds predictions
#`add_residuals()` adds residuals
#for example
sim1_mod <- lm(y~x,sim1)
sim1 %>%
data_grid(x) %>% #`data_grid()` extracts the unique values of x
add_predictions(sim1_mod)
sim1 %>%
add_residuals(sim1_mod)
##interactions(one CA and one CO)
#show effects of different models
mod1 <- lm(y~x1+x2,sim3)
mod2 <- lm(y~x1*x2,sim3)
grid <- sim3 %>%
data_grid(x1,x2) %>%
gather_predictions(mod1,mod2)
ggplot(sim3,aes(x1,y,color=x2)) +
geom_point() +
geom_line(data=grid,aes(y=pred)) +
facet_wrap(~model)
##interactions(two COs)
#`seq_range()` generates values between the smallest value and the biggest by a fixed distance
> seq_range(c(0.0123,0.9871),n=5)
[1] 0.0123 0.2560 0.4997 0.7434 0.9871
> seq_range(c(0.0123,1.9871),n=5, pretty=T) #make values "pretty"
[1] 0.0 0.5 1.0 1.5 2.0
> seq_range(c(0.0123,1.9871),n=5, trim=0.1) #trim the range by 10%
[1] 0.11104 0.55537 0.99970 1.44403 1.88836
> seq_range(c(0.0123,1.9871),n=5, expand=0.1) #expand the range by 10%
[1] -0.08644 0.45663 0.99970 1.54277 2.08584
#predict
mod1 <- lm(y~x1+x2,sim4)
mod2 <- lm(y~x1*x2,sim4)
grid <- sim4 %>%
data_grid(
x1 = seq_range(x1,5),
x2 = seq_range(x2,5)
) %>%
gather_predictions(mod1,mod2)