0- 描述
描述:时间序列–取最新完成状态的前一个状态
表名:t21
表字段及内容:
date_id a b
2014 1 A
2015 1 B
2016 1 A
2017 1 B
2013 2 A
2014 2 B
2015 2 A
2014 3 A
2015 3 A
2016 3 B
2017 3 A
上表中B为完成状态。
1- 问题一
描述:取最新完成状态的前一个状态
输出结果如下所示:
date_id a b
2016 1 A
2013 2 A
2015 3 A
参考答案:
select
next_date_id as date_id
,a
,next_b as b
from(
select
*,min(nk) over(partition by a,b) as minb
from(
select
*,row_number() over(partition by a order by date_id desc) nk
,lead(date_id) over(partition by a order by date_id desc) next_date_id
,lead(b) over(partition by a order by date_id desc) next_b
from(
select * from t21
) t
) t
) t
where minb = nk and b = 'B';
2- 问题二
描述:如何将完成状态的过程合并
输出结果如下所示:
a b_merge
1 A、B、A、B
2 A、B
3 A、A、B
参考答案:
select
a
,collect_list(b) as b
from(
select
*
,min(if(b = 'B',nk,null)) over(partition by a) as minb
from(
select
*,row_number() over(partition by a order by date_id desc) nk
from(
select * from t21
) t
) t
) t
where nk >= minb
group by a;
总结:相关窗口函数的考察,lead()over()