顺序:
1.收集数据
2.探索数据和准备数据
3.基于数据训练模型
4.评价模型的性能
5.改进模型的性能
1.2步为两个表格文件
3.
usedcars=read.csv("usedcars.csv",stringsAsFactors = FALSE)
str(usedcars)
summary(usedcars[c("price","mileage")])
range(usedcars$price)
diff(range(usedcars$price))
IQR(usedcars$price)
boxplot(usedcars$price,main = "Boxplot of Usedcars price",
ylab="Price($)")
hist(usedcars$price,main = "Hist of Usedcars price",
xlab="Price($)")
var(usedcars$price)
sd(usedcars$price)
cor(usedcars[c("price","mileage","