insurance <- read.csv("insurance.csv", stringsAsFactors = TRUE)
str(insurance)
#既然因变量是charges,我们就来看一下它是如何分布的
summary(insurance$charges)
hist(insurance$charges)
table(insurance$region)
cor(insurance[c("age","bmi","children","charges")])
pairs(insurance[c("age","bmi","children","charges")])
library("psych")
pairs.panels(insurance[c("age","bmi","children","charges")])
#lm代表线性回归算法
ins_model <- lm(charges ~ age + children + bmi + sex + smoker + region, data=insurance)
ins_model <- lm(charges ~ . , data=insurance)
ins_model
summary(ins_model)
insurance$age2 <- insurance$age^2
insurance$bmi30 <- ifelse(insurance$bmi >= 30, 1, 0)
ins_models <- lm(charges ~ age + age2 + children + bmi + sex + bmi30*smoker + region , data=insurance)
summary(ins_models)
![](https://i-blog.csdnimg.cn/blog_migrate/aa1287418101443243af35f20967f5b5.png)
![](https://i-blog.csdnimg.cn/blog_migrate/663e241a03c9094b4fa58c3ec47c625d.png)
![](https://i-blog.csdnimg.cn/blog_migrate/6f40fa7aa211df752d6e09fa1d741130.png)
![](https://i-blog.csdnimg.cn/blog_migrate/e6a89ef318300742aae5f3a6f3cf9180.png)
![](https://i-blog.csdnimg.cn/blog_migrate/762f490ebdb3d04caea0eff59b2ceb52.png)
![](https://i-blog.csdnimg.cn/blog_migrate/207abe7aadc3ea7d5223792091e078b3.png)
![](https://i-blog.csdnimg.cn/blog_migrate/9978310f9fc0380aa2e4478767f370c0.png)
![](https://i-blog.csdnimg.cn/blog_migrate/33c51dc66824b5fe9d4a8979ebcf8e58.png)
![](https://i-blog.csdnimg.cn/blog_migrate/c2f42e6d438f07ab710824968e25b073.png)