SQL163 每篇文章同一时刻最大在看人数
select
artical_id,max(cnt) as max_uv
from
(
select
artical_id,
sum(num) over (partition by artical_id order by dt,num desc ) as cnt
from
(
select artical_id,in_time as dt,1 as num from tb_user_log where artical_id != 0
union all
select artical_id,out_time as dt, -1 as num from tb_user_log where artical_id != 0
)t1
)t2
group by artical_id
order by max_uv desc;
- 套路:
①将用户的进入时间单独拎出来,同时记为1;离开时间单独拎出来,同时记为-1,这样就聚合这两个表,按照时间排序,意思就是:进去一个加1,离开一个减1。
②然后利用窗口函数对计数(1或者-1)求累计和,因为题目规定:同一时间有就有出的话先算进来的后算出去的,所以排序的时候就要看好了先按时间排序,再按计数排序!
③然后再在每个分组里面去求最大的累积和就是最多同时在线的人数了!
SQL164 每天新用户的次日留存率
with a as (
select
t3.uid,
t3.first_date,
t3.login_date,
datediff(t3.login_date,t3.first_date) as remain_date --登录时间距离注册日期的天数
from (
select distinct t2.uid -- 去重防止同一用户多次登陆的情况,
t2.first_date,
date(out_time) as login_date --登陆时间
from tb_user_log t1
left join
(select uid, min(date(in_time)) as first_date -- 用户访问的最小时间
from tb_user_log group by uid) t2
on t1.uid = t2.uid
where substr(date(in_time),1,7) = '2021-11' and substr(first_date,1,7) = '2021-11'
)t3
)select
a.first_date dt, round(sum(if(remain_date = 1,1,0)) / count(distinct a.uid) ,2) as uv_left_rate
from a
group by dt
order by dt;
SQL166 每天的日活数及新用户占比
-- 如果(login_date)dt=first_date 那这天就是用户首次登录成为新用户的日子
WITH a AS (
SELECT uid, DATE(in_time) dt, MIN(DATE(in_time)) OVER (PARTITION BY uid) AS first_date -- 用户第一次登陆日期
FROM tb_user_log
UNION
SELECT uid, DATE(out_time) dt, MIN(DATE(in_time)) OVER (PARTITION BY uid) AS first_date
FROM tb_user_log
)
SELECT uid,
dt,
IF(first_date = dt, 1, 0)
FROM a
;
新用户判断:第一次登陆时间 = 当天时间。即为当天的新增用户
如果(login_date)dt = first_date 那这天就是用户首次登录成为新用户
-- in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过。
-- 例如:108,2021-11-01,2021-11-02 在1号和2号都记为活跃
WITH a AS (
SELECT uid, DATE(in_time) dt, MIN(DATE(in_time)) OVER (PARTITION BY uid) AS first_date -- 用户第一次登陆日期
FROM tb_user_log
UNION
SELECT uid, DATE(out_time) dt, MIN(DATE(in_time)) OVER (PARTITION BY uid) AS first_date
FROM tb_user_log
)
SELECT dt,
COUNT(1) AS dau, -- 每天活跃人数
ROUND(SUM(IF(first_date = dt, 1, 0)) / COUNT(1), 2) AS uv_new_ratio -- first_date=dt 新增人员判断依据
FROM a
GROUP BY dt ORDER BY dt
;
SQL29 计算用户的平均次日留存率
- 解1:
with a as (
select
distinct t1.device_id,t1.date d1,t2.date d2
from question_practice_detail t1
left join (
select device_id,date from question_practice_detail
)t2
on t1.device_id = t2.device_id and datediff(t1.date,t2.date)=1
)select count(a.d2)/count(a.d1) as avg_ret from a;
- 解2:
select avg(if(datediff(d2,d1)=1 , 1 , 0))as avg_ret
from (
select distinct device_id,
date as d1,
lead(date) over (partition by device_id order by date) as d2
from(
select distinct device_id,date from question_practice_detail
)t1
)t2;