今天快下班的时候,业务临时提出了个需求,想看到网站跳出率分析数据,不得不在用气ggplot2快速画了几个图,中途还出了点差错,reshape包中的melt函数太久没用,忘了,耽误不少时间,不说,直接上ggplot的代码,代码实现的是散点连线图,有很多细节方面的东西,有心情的话看看:
library(RMySQL)
library(ggplot2)
library(reshape)
# 文件操作
# list.files(pattern=("\\.pdf$"))
# file.remove( list.files(pattern=("\\.pdf$")))
# file.exists("uv走势图total.pdf")
conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333)
query_homepage<-dbSendQuery(conn,"select insert_date,avg(stay_time) as avg_staytime from
( select left(insert_time,8) as insert_date, case when stay_time >=1800 then 1800 else stay_time end as stay_time
from tracker.hbase_visit where page_url like '%www.zhong.com%') a group by insert_date having insert_date is not null;")
result_homepage<- fetch(query_homepage,n=-1)
query_detailpage<-dbSendQuery(conn,"select insert_date,avg(stay_time) as avg_staytime from
(select left(insert_time,8) as insert_date,case when stay_time >=1800 then 1800 else stay_time end as stay_time
from tracker.hbase_visit where page_url like '%item.zhong.com/item/detail.do?productId%') a group by insert_date
having insert_date is not null;")
result_detailpage<- fetch(query_detailpage,n=-1)
query_homejumppage<-dbSendQuery(conn,"select insert_date,pv,bounce, (bounce/pv) as bounceRate from
( select left(insert_time,8) as insert_date, count(*) as pv,sum(case when stay_time =0 and leave_time=0 then 1 else 0 end) as bounce
from tracker.hbase_visit where page_url like '%www.zhong.com%' group by left(insert_time,8) ) a
where insert_date is not null ;")
result_homejumppage<- fetch(query_homejumppage,n=-1)
query_detailjumppage<-dbSendQuery(conn,"select insert_date,pv,bounce, (bounce/pv) as bounceRate from
(select left(insert_time,8) as insert_date, count(*) as pv,
sum(case when stay_time =0 and leave_time=0 then 1 else 0 end) as bounce
from tracker.hbase_visit where page_url like '%item.zhong.com/item/detail.do?productId%'
group by left(insert_time,8)) a where insert_date is not null;")
result_detailjumppage<- fetch(query_detailjumppage,n=-1)
dbDisconnect(conn)
head(result_homepage)
t<-Sys.time()
now_yesterday<-as.POSIXlt(t-24*60*60)
now<-as.Date(now_yesterday,format="%Y-%m-%d")
date_result<-strsplit(as.character(now),"-")
title<-paste("08月18至",date_result[[1]][2],"月",date_result[[1]][3],"日",sep="")
#theme(legend.position='none')参数是去掉右边日期标签
p_detailtime<-ggplot(result_detailpage,aes(x=insert_date,y=avg_staytime,colour=insert_date))+geom_line(colour="black",position=pd,aes(group=1)) + geom_point(size=4,position=pd)+xlab("日期")+ylab("停留时间")
p_detailtime+ggtitle(paste(title,"访问者每天每详情页停留时间",sep=""))+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
theme(axis.text.x=element_text(angle=90,color='black'))+theme(legend.position='none')
# dev.off()
#首页跳出率
p_homejumppage<-ggplot(result_homejumppage,aes(x=insert_date,y=bounceRate,colour=insert_date))+geom_line(colour="black",position=pd,aes(group=1)) + geom_point(size=4,position=pd)+xlab("日期")+ylab("停留时间")
p_homejumppage+ggtitle(paste(title,"访问者每天首页跳出率",sep=""))+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()
#首页跳出率和总的图对比
result_homejumppage_melt<-melt(result_homejumppage,id=c("insert_date"),measure=c(2:3),variable_name = "group",value.name="value")
result_homejumppage_melt<-result_homejumppage_melt[order(result_homejumppage_melt$insert_date),]
p_home_melt<-ggplot(result_homejumppage_melt,aes(x=insert_date,y=value,colour=group))+geom_point(size=4)+geom_line(aes(group=group))
p_home_melt+xlab("日期")+ylab("pv")+ggtitle(paste(title,"首页每日uv和跳出率对比图",sep=""))+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
scale_colour_hue("渠道",labels=c("pv","bounce"))+theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()
#详情页跳出率
p_detailjumppage<-ggplot(result_detailjumppage,aes(x=insert_date,y=bounceRate,colour=insert_date))+geom_line(colour="black",position=pd,aes(group=1)) + geom_point(size=4,position=pd)+xlab("日期")+ylab("停留时间")
p_detailjumppage+ggtitle(paste(title,"每日详情页跳出率分析",sep=""))+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+
theme(axis.text.x=element_text(angle=30,color='black'))
# dev.off()
#详情页跳出率和总的图对比
result_detailjumppage_melt<-melt(result_detailjumppage,id=c("insert_date"),measure=c(2:3),variable_name = "group",value.name="value")
result_detailjumppage_melt<-result_detailjumppage_melt[order(result_detailjumppage_melt$insert_date),]
p_detail_melt<-ggplot(result_homejumppage_melt,aes(x=insert_date,y=value,colour=group))+geom_point(size=4)+geom_line(aes(group=group))
p_detail_melt+xlab("日期")+ylab("pv")+ggtitle(paste(title,"详情页每日uv和跳出率对比图",sep=""))+theme(panel.background = element_rect(fill = "transparent", color = "gray")) +
+scale_colour_hue("渠道",labels=c("pv","bounce"))+theme(axis.text.x=element_text(angle=30,color='black'))
# + geom_text(aes(label=value))
# dev.off()