1、求用户连续登录最大天数
step1 构造伪列,对每个用户的日期排序得到字段rn(假设每个用户每日只有一条记录),再用当前日期减去rn得到新的date_tmp,若客户连续登录,那么将有若干个相同的date_tmp;
step2 基于上一步结果,统计每个用户每种date_tmp下的数量,然后在对客户id groupby 计算每个客户的最大连续登录天数
--step1 构造伪列,对每个用户的日期排序得到字段rn(假设每个用户每日只有一条记录),再用当前日期减去rn得到新的date_tmp,若客户连续登录,那么将有若干个相同的date_tmp
create table tmp_table stored as parquet as
select
tmp.id,
tmp.date,
data_sub(tmp.date,tmp.rn) as date_tmp
from
(
select
id,
date,
row_number() over(partition by id order by data) rn
from dm.table
)tmp
--step2 基于上一步结果,统计每个用户每种date_tmp下的数量,然后在对客户id groupby 计算每个客户的最大连续登录天数
select
id,
max(date_cnt) as cnt
from
(
select
id,
date_tmp,
count(1) as date_cnt
from tmp_table
group by id,date_tmp
)t
group by id
2、求连续点击三次的用户数,中间不能有别人的点击 ,最大连续天数的变形问题
3、计算除去部门最高工资,和最低工资的平均工资
分别按升序和降序排出部门的最大和最小薪资,将二者筛选后剔除,在计算均值
--emp 表
--id 员工 id ,deptno 部门编号,salary 工资
select
deptno,avg(salary)
from
(
select
id,
deptno,
salary,
rank() over(partition by deptno order by salary asc) as rank_low,
rank() over(partition by deptno order by salary desc) as rank_high
from table
)tmp
where tmp.rank_low>1 and rank_high>1
group by deptno
4、留存率计算
--假设每个客户每天只登录一次
--step 1 自联结 step 2 按日期group by得到每天的1、3、7日留存率
select
tmp.load_tm_a,
count(if(diff_days = 1,id,null)) as '次日留存人数',
count(if(diff_days = 1,id,null))/count(id) as '次日留存人数',
count(if(diff_days = 3,id,null)) as '3日留存人数',
count(if(diff_days = 3,id,null))/count(id) as '3日留存人数',
count(if(diff_days = 7,id,null)) as '7日留存人数',
count(if(diff_days = 7,id,null))/count(id) as '7日留存人数',
from
(
select
id,
load_tm_a,
load_tm_b,
DATEDIFF(day,'load_tm_b','load_tm_a') as diff_days
from table
left join table
on a.id = b.id
)tmp
group by load_tm_a