#### packages | |
install.packages("ggplot2") | |
install.packages("ROCR") | |
install.packages("glmnet") | |
install.packages("Metrics") | |
install.packages("DMwR") | |
install.packages("Rcpp") | |
library(ggplot2) | |
library(ROCR) | |
library(glmnet) | |
library(Metrics) | |
#### Input | |
marketing<- read.csv("marketing.csv") | |
head(marketing) | |
summary(marketing) | |
#### Data Visualization ############################# | |
### Average age for each occupation | |
ggplot(marketing, aes(job, age)) + | |
geom_bar(stat = "summary", fun.y = "mean", color = "black",fill= "grey", width = 0.5) + | |
theme_bw() + | |
labs( y = "Age", | |
title = "Age Distribution")+ | |
theme(plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5)) | |
ggplot(marketing, aes(job, age, fill = y)) + | |
geom_bar(stat = "summary", fun.y = "mean", width = 0.5) + | |
theme_bw() + | |
labs( y = "Age", | |
title = "Age Distribution") | |
ggplot(marketing, aes(job, age, fill = y)) + | |
geom_bar(stat = "summary", fun.y = "mean", width = 0.5) + | |
facet_wrap( ~ marital) | |
theme_bw() + | |
labs( y = "Age", | |
title = "Age Distribution") | |
geom_density() | |
####Data preparation################################## | |
## Training and Testing | |
data_y<- marketing[marketing$y == "yes",] | |
data_n<- marketing[marketing$y == "no", ] | |
set.seed(1234) | |
ysub<- sample(nrow(data_y), floor(nrow(data_y)*0.7)) | |
nsub<- sample(nrow(data_n), floor(nrow(data_n)*0.7)) | |
train_yes<- data_y[ysub,] | |
train_no<- data_n[nsub,] | |
test_yes<- data_y[-ysub,] | |
test_no<- data_n[-nsub,] | |
train<- rbind(train_yes, train_no) | |
train$y<- ifelse(train$y== "yes", 1, 0) | |
test<- rbind(test_yes, test_no) | |
test$y<- ifelse(test$y== "yes", 1, 0) | |
nrow(marketing)- nrow(train)- nrow(test) | |
print(prop.table(table(train$y))) | |
#### Explore SMOTe | |
library(DMwR) | |
X<- nrow(train_no) | |
Y<- nrow(train_yes) | |
perc.over<- ((X-Y)*100/Y) | |
perc.under<- X*100/(X-Y) | |
train$y<- as.factor(train$y) | |
train_bal <- SMOTE(y ~ . , train, perc.over=perc.over, perc.under = perc.under) | |
print(prop.table(table(train_bal$y))) | |
################## Model result function | |
modelperf<- function(ypredict, ytrue, cutoff) { | |
library(ROCR) | |
## | |
ypredict <- as.numeric(ypredict) | |
ytrue<- as.numeric(as.character(ytrue)) | |
yresult<- ifelse(ypredict > cutoff, 1,0) | |
accuracy <- 1 - mean(yresult != ytrue) | |
ypredict |
R语言对于Machine learning的代码以及研究
最新推荐文章于 2020-08-12 19:16:31 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)