R语言手册(第三站 探索性数据分析)

R语言手册(第三站 探索性数据分析)

标签: R语言

1.读入chrn数据集

churn <- d.csv(file = "C:/.../churn.txt", stringsAsFactors = TRUE)

#显示前10条记录
churn[1:10,]

#总结客户流失变量
sum.churn <- summary(churn$Churn)
sum.churn

#计算客户流失比例
prop.churn <- sum(churn$Churn == "True")/length(churn$Churn)
prop.churn

2.流失变量的条形图

barplot(sum.churn, 
        ylim=c(0,3000), 
        main="Bar Graph of Churners and Non-Churners", 
        col="lightblue")
box(which ="plot", ty="solid", col="black")

输出:image_1d96igi0d1e3ualidpu1f334ts9.png-59.9kB

3.为客户流失和国际套餐的计数建表

counts <-table(churnSChurn, 
                churn$Int.l. Plan,
                dnn=c("Churn","International Plan"))
counts

输出:image_1d96ij82fptr9oh12uncce12vam.png-11.5kB

4.叠加柱状图

barplot(counts, 
        legend=rownames(counts), 
        col =c("blue","red"),
        ylim=c(0,3300), 
        ylab="Count", 
        xlab="International Plan", 
        main="Comparison Bar Chart: Churn Proportionsby International Plan")
box(which ="plot", Ity ="solid", col="black")

输出:image_1d96iml0g1dd61m18icm92qjna13.png-50.3kB

5.创建两个变量的汇总表

sumtable <- addmargins(counts, FUN=sum)
sumtable

6.创建分行比例表

输出:image_1d96k5q5pb5ils87pa1u101b5v19.png-13.4kB

row. margin<-round(prop.table(counts, margin=1),4)*100
row. margin

7.创建分列的比例表

输出:image_1d96k62gi2161unc18gi6pi120i1m.png-10.7kB

col. margin <-round(prop. table(counts, margin=2),4)*100
col. margin

输出:image_1d96k695o1ktk187h13hp99i58b23.png-11.8kB

8.带有图例的聚类条形图

barplot(t(counts), 
        col = c("blue","green"), 
        ylim = c(0,3300), 
        ylab = "Counts", 
        xlab = "Churn", 
        main = "Intemational PlanCountby Chum",
        beside = TRUE)
legend("topright", c(rownames(counts)), 
        col=c("blue","green"), 
        pch=15,
        title="Intl Plan")
box(which ="plot", ty="solid", col="black")

输出:image_1d96k6ons1hn21e0k3dt1huq1hf030.png-44.7kB

9.客户服务呼叫的非覆盖直方图

hist(churn$CustServ.Calls, 
    xlim=c(0,10), 
    col="lightblue", 
    ylab="Count", 
    xlab="Customer Service Calls", 
    main="Histogramof Customer Service Calls")

输出:image_1d96ne5041dlhd1a62f6ib6qo3d.png-42.6kB

10.下载并安装R包ggplot2

install. packages("ggplot2")
# Pick any CRAN mirror
#(seeexample image)
# Open the newpackage library(ggplot2)

11.覆盖条形图

geplot()+
geom_bar(data=churn, 
        aes(x=factor(churnSCustServ. Calls), 
        fill =factor(churnSChurn)), position="stack")+
        scalex discrete("Customer Service Calls")+
        scale y continuous("Percent")+
        guides(fill=guide _legend(title="Churn"))+
        scale_fill_manual(values=c("blue","red"))

输出:image_1d96nipsg1nlaithaag1d9j1ma63q.png-86.1kB

geplot()+
geom _bar(data=churn, 
        aes(x=factor(churn$CustServ. Calls)
        fill=factor(churnSChurn)), position="fill")+
        scalex discrete("Customer Service Calls"+
        scaley continuous("Percent")+
        guides(fill=guide _legend(title="Churn"))+
        scale_fill_manual(values=c("blue","red"))

输出:image_1d96nl8ebjugb8hnth143k1p1a47.png-77.9kB

12.t-检验和国际电话的两个例子

#数据分割
churn. false<-subset(churn, churn$Churn=="False")
churn. true<-subset(churn, churn$Churn=="True")
#运行检验
t. test(churn.falseSIntl.Calls, churn.trueSIntl.Calls)

输出:image_1d96nr1eboqta1eec81srt8vn4k.png-37.3kB

13.傍晚使用时长和白天使用时长的散点图,将客户流失着色

lot(churnSEve. Mins, 
    churnSDay. Mins, 
    xlim=c(0,400), 
    ylim=c(0,400), 
    xlab="Evening Minutes", 
    ylab="Day Minutes", 
    main="Scatterplot of Day and Evening Minutes by Churn", 
    col =ifelse(churnSChurn="True","red","blue"))
legend("topright", 
        c("True","False"), 
        col =c("red","blue"), 
        pch=1, 
        title="Churn")

输出:image_1d96nt3etc5vqti1vv41c06137q51.png-71.4kB

14.白天使用时长和客户服务电话量的散点图,将客户流失着色

plot(churn$Day.Mins, 
    churn$CustServ.Calls, 
    xlim=c(0,400), 
    xlab="Day Minutes", 
    ylab="Customer Service Calls", 
    main="Scatterplot of Day Minutes and Customer Service Calls by Churn",
    col=ifelse(churnSChurn="True","red","blue"), 
    pch=ifelse(churnSChurn="True",16,20)) 
legend("topright", 
        c("True","False"), 
        col=c("red","blue"), 
        pch=c(16,20), 
        title ="Churn")

输出:image_1d96o15k11c2de481angq8q1el05e.png-55.2kB

15.散点图矩阵

pairs(churnSDay.Mins+
    crurn$Day.Calls+
    churnSDay.Charge)

输出:image_1d96o31nf15ev1cttm6vmq5bgp5r.png-71.4kB

16.白天费用和白天使用时长的回归分析

fit<-Im(churn$Day.Charge~churnSDay.Mins)
summary(fit)

输出:image_1d96o45rf1ck2rmj1i1kn6lm4p68.png-54.5kB

16.相关值和p值

days <-cbind(churn$Day.Mins, 
            churnSDay.Calls, 
            churnSDay.Charge)
MinsCallsTest<-cor.test(churn$Day.Mins,churn$Day. Calls)
MinsChargeTest <-cor.test(churn$Day.Mins,churnSDay.Charge)
CallsCharge Test <-cor.test(churnSDay.Calls,churnsDay.Charge)
round(cor(days),4) 
MinsCallsTest$p.value 
MinsChargeTest$p.value
CallsChargeTest$p.value

输出:image_1d96orgk9dpsbvh13lv1otf77572.png-32.3kB

17相关值和p值以矩阵形式展现

#收集感兴趣的变量
corrdata<-cbind(churn$Account.Length,
                churn$VMail.Message, 
                churn$Day.Mins, 
                churn$Day.Calls, 
                churn$CustServ.Calls)
#声明矩阵
corrpvalues<-matrix(rep(0,25), ncol=5)
#使用相关系数填充矩阵
for(i in 1:4){
    for (G in(i+l):5){
            corrpvalues[i,j]<-
            corrpvalues[j,i]<-
            round(cor.test(corrdata[,i], corrdata[j])$p.value,4) 
    }
}
round(cor(corrdata),4)
corrpvalues

输出:image_1d96omvkh1n1718en147j1pnfo06l.png-61.3kB

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值