电商类网站的各种模块指标统计以及分析

以下这些都是针对于电商网站的指标来分析业务的:以下这些指标都是使用 SQL 语句分析得出的结果,首先先创建一张表,然后将数据导入表中,字段根据分析的业务的需要而定,这里我的选择是我已经存在的表进行查询的,下面将给出所有查询的 SQL 语句

模块分析:
select * from web_log.weblog_etl where dateStr="20180709" limit 5;


1. 统计每天每小时的 PV 数
select day,hour,count(request) as pvs from web_log.weblog_detail where dateStr="20180709" group by day,hour

2. 统计每天的 pv 数
select dayStr,day,count(request) as pvs from web_log.weblog_detail where dateStr="20180709" group by dayStr,day

3. 统计每月的 pv 数
select dayStr,month,count(request) as pvs from web_log.weblog_detail where dateStr="20180709" group by dayStr,month

4. 统计不同终端维度的 PV 数
select dayStr,user_agent,count(request) as pvs from web_log.weblog_detail where dateStr="20180709" group by dayStr,user_agent

5. 统计不同终端类型
select user_agent,count(user_agent) as iphone from web_log.weblog_detail where dateStr="20180709" group by user_agent

6. 统计每个月每个终端的 pv 数
select dayStr,month,day,hour,user_agent,count(request) as pvs from web_log.weblog_detail where dateStr="20180709" group by dayStr,month,day,hour,user_agent

7. 统计人均 pv 数
select dayStr,count(request)/count(distinct ip) from web_log.weblog_detail where dateStr="20180709" group by dayStr

8. 统计不同来源的 pv 数

select
*
from
(
select t.ref_host,t.dayStr,t.hour,t.pv,row_number() over (partition by t.ref_host order by t.pv) as rnk
from 
(
select
ref_host,dayStr,hour,count(request) as pv
from
web_log.weblog_detail
where
dateStr="20180709"
group by
ref_host,dayStr,hour
)t
)n
where n.rnk <4
---------------
select
*
from
(
select t.ref_host,t.pv,row_number() over (partition by t.ref_host order by t.pv) as rnk
from 
(
select
ref_host,count(request) as pv
from
web_log.weblog_detail
where
dateStr="20180709"
group by
ref_host
)t
)n
where n.rnk <4

9. 受访分析
9.1 统计每日热门的受访页面的 topN
9.1.1 先统计每日每个网页访问次数
  select dayStr,request,count(request) as counts from weblog_detail
where dateStr="20180709" group by dayStr,request

(2)row_number
   select dayStr,request,count(request) as pv,row_number() over (partition by request order by request) from weblog_detail
where dateStr="20180709" group by dayStr,request

10. 访客分析
10.1 每个访客每小时的访问数 (session 个数)
select
ip,substring(s_time,12,2) as hour,count(distinct sessionId) as number
from web_log.weblog_pv
where
dateStr="20180709"
group by ip,substring(s_time,12,2)

10.2 每个小时的 ip 访问数
select day,hour,count(ip) from web_log.weblog_detail where dateStr="20180709" group by day,hour


10.3 每天的 / 每月的 ip 访问数
select month,day,count(ip) from web_log.weblog_detail where dateStr="20180709" group by month,day

10.4 统计新增访客 --- 需要维护 历史访客表

11.visit 模型分析
11.1 单次访客统计(sessionId 只有一个)
select
substring(s_time,9,2),ip,count(distinct sessionId) as number
from
web_log.weblog_pv
where
dateStr="20180709"
group by
substring(s_time,9,2),ip
having number=1

11.2 回头访客个数(sessionId >1)
select
substring(s_time,9,2),ip,count(distinct sessionId) as number
from
web_log.weblog_pv
where
dateStr="20180709"
group by
substring(s_time,9,2),ip
having number>1

11.3 人均访问频率 (一天访问多少次) 总 session 个数 / 总人数
select
substring(s_time,9,2) as day,count(distinct sessionId)/count(distinct ip) as number
from
web_log.weblog_pv
where
dateStr="20180709"
group by 
substring(s_time,9,2)

11.4 用户平均访问页数
select sum(pageNum)/count(distinct ip) from web_log.weblog_visit where dayStr="20180709"

12pageview 模型分析
12.1 每个用户的访问时长
select
ip,sum(stayTime) as alltime
from
web_log.weblog_pv
where dateStr="20180709"
group by ip
12.2 用户的平均访问时长
select sum(stayTime)/count(distinct ip) as avg_time
from
web_log.weblog_pv
where
dateStr="20180709"

12.3 访问页面的漏斗

create table tb_num1 as 
select "step1" as  step, count(*) as number from web_log.weblog_pv where dateStr="20180709" and request  like "%/hadoop-mahout%"  

union all 
select "step2" as  step, count(*) as number from web_log.weblog_pv where dateStr="20180709" and request  like "%/hadoop-hive%"   

union all  
select "step3" as  step, count(*) as number from web_log.weblog_pv where dateStr="20180709" and request  like "%/hadoop-zookeeper%"  

union all  
select "step4" as  step,count(*) as number from web_log.weblog_pv where dateStr="20180709" and request  like "%/black-ip-list%"  


create table tb_num as 
select "step1" as  step, count(*) as number from web_log.weblog_pv where dayStr="20180709" and request  like "%/hadoop-mahout%"
union all   
select "step2" as  step, count(*) as number from web_log.weblog_pv where dayStr="20180709" and request  like "%/hadoop-hive%" 
union all  
select "step3" as  step, count(*) as number from web_log.weblog_pv where dayStr="20180709" and request  like "%/hadoop-zookeeper%"  
union all  
select "step4" as  step,count(*) as number from web_log.weblog_pv where dayStr="20180709" and request  like "%/black-ip-list%"


select
   *
from
   tb_num
   
----- 全链接    笛卡尔积
select
   *
from
   (select tb_num.step as ak,tb_num.number as av from tb_num) a,
   (select tb_num.step as bk,tb_num.number as bv from tb_num) b

----- 漏斗

select
 t.ak,t.av,t.bk,t.bv,t.av/t.bv as retain_rate
from
(
select
   *
from
   (select tb_num.step as ak,tb_num.number as av from tb_num) a,
   (select tb_num.step as bk,tb_num.number as bv from tb_num) b
) t
where
   t.ak='step1' and t.bk='step1' or
   t.ak='step2' and t.bk='step1' or
   t.ak='step3' and t.bk='step2' or
   t.ak='step4' and t.bk='step3'

 实例:不使用 hive 中的函数  

公司有多个部门,统计出各个部门中 按照工资排序 前 3 的。(9 条)
      name        deptNo         sal    
      1-1        1            9000
      1-2        1            6000
      1-3        1            7000
      2-1        2            8000
      3-2        3            7600
      2-3        2            8800
      2-5        2            8800
      3-3        3            5500
      3-4        1            5000

SQL 语句:

select deptno, name, sal      
from emp e1     
where      
   (  
    select count(1)     
    from emp e2     
    where e2.deptno=e1.deptno and e2.sal>=e1.sal  
   ) <=3 /* 这里的数值表示你想取前几名 */  
order by deptno, sal desc; 
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值