churn <- read.csv(file = "../churn.txt",stringsAsFactors=TRUE)
churn[1:10,]
sum.churn <- summary(churn$Churn)
prop.churn <- 483/(2850+483)
counts <- table(churn$Churn,churn$Int.l.Plan,dnn=c("Churn","International Plan"))
counts
barplot(counts,legend=rownames(counts),col=c("blue","red"),ylim=c(0,3300),ylab="Count",xlab = "International Plan",main="Comparison Bar Chart: Churn Proportionsby International Plan")
box(which="plot",lty="solid",col="black")
> counts
International Plan
Churn no yes
False. 2664 186
True. 346 137
图 参与国际套餐的客户流失比例
创建两个变量的汇总表:
sumtable <- addmargins(counts,FUN=sum)
> sumtable
International Plan
Churn no yes sum
False. 2664 186 2850
True. 346 137 483
sum 3010 323 3333
带有图例的聚类条形图:
barplot(counts,legend=rownames(counts),col=c("blue","red"),ylim=c(0,3300),ylab="Count",xlab = "International Plan",main="International PlanCountby Churn",beside=TRUE)
legend("topright",c(rownames(counts)),co=c("blue","green"),pch=15,title="International Plan")
box(which="plot",lty="solid",col="black")
客户服务呼叫的非覆盖直方图
hist(churn$CustServ.Calls,xlim=c(0,10),col="lightblue",ylab="Count",xlab="Customer Service Calls",main="Histogram of Customer Service Calls")
覆盖条形图
library(ggplot2)
ggplot()+
geom_bar(data=churn,aes(x=factor(churn$CustServ.Calls),
fill=factor(churn$Churn)),position="stack")+
scale_x_discrete("Customer Service Calls")+
guides(fill=guide_legend(title="Churn"))+
scale_fill_manual(values=c("blue","red"))
傍晚使用时长和白天使用时长的散点图,将客户流失着色
plot(churn$Eve.Mins,churn$Day.Mins,xlim=c(0,400),ylim=c(0,400),xlab="Evening Minutes",ylab="Day Minutes",main="Scatterplot of Day and Evening Minutes by Churn",col=ifelse(churn$Churn=="False","red","blue"))
legend("topright",c("True","False"),col=c("red","blue"),pch=1,title="Churn")
注:具体代码将churn$Churn=="False"改为churn$Churn=="True"
白天使用时长和客户服务电话量的散点图,将客户流失着色
plot(churn$Day.Mins,churn$CustServ.Calls,xlim=c(0,400),xlab="Day Minutes",ylab="Customer Service Calls",main="Scatterplot of Day Minutes and Customer Service Calls by Churn",col=ifelse(churn$Churn=="True","red","blue"),pch=ifelse(churn$Churn=="True",16,20))
legend("topright",c("True","False"),col=c("red","blue"),pch=c(16,20),title="Churn")
白天费用和白天使用时长的回归分析
fit <- lm(churn$Day.Charge~churn$Day.Mins)
summary(fit)
churn数据集见资源