需求:
有用户账户余额表 test_data ,具体数据如下
create table test_data as
select 'u1' as user_id,'2023-01-01' as rdate,100 as xmoney union all
select 'u1' as user_id,'2023-02-01' as rdate,150 as xmoney union all
select 'u1' as user_id,'2023-02-10' as rdate,130 as xmoney union all
select 'u2' as user_id,'2023-01-01' as rdate,110 as xmoney union all
select 'u2' as user_id,'2023-01-11' as rdate,140 as xmoney union all
select 'u2' as user_id,'2023-02-01' as rdate,180 as xmoney ;
假设当前日期为2023年2月15日,给出每天用户的账户金额(补齐缺失日期的记录)
create table test_dim_date as
select '2023-01-01' as xdate union all
select '2023-01-02' as xdate union all
select '2023-01-03' as xdate union all
select '2023-01-04' as xdate union all
select '2023-01-05' as xdate union all
select '2023-01-06' as xdate union all
select '2023-01-07' as xdate union all
select '2023-01-08' as xdate union all
select '2023-01-09' as xdate union all
select '2023-01-10' as xdate union all
select '2023-01-11' as xdate union all
select '2023-01-12' as xdate union all
select '2023-01-13' as xdate union all
select '2023-01-14' as xdate union all
select '2023-01-15' as xdate union all
select '2023-01-16' as xdate union all
select '2023-01-17' as xdate union all
select '2023-01-18' as xdate union all
select '2023-01-19' as xdate union all
select '2023-01-20' as xdate union all
select '2023-01-21' as xdate union all
select '2023-01-22' as xdate union all
select '2023-01-23' as xdate union all
select '2023-01-24' as xdate union all
select '2023-01-25' as xdate union all
select '2023-01-26' as xdate union all
select '2023-01-27' as xdate union all
select '2023-01-28' as xdate union all
select '2023-01-29' as xdate union all
select '2023-01-30' as xdate union all
select '2023-01-31' as xdate union all
select '2023-02-01' as xdate union all
select '2023-02-02' as xdate union all
select '2023-02-03' as xdate union all
select '2023-02-04' as xdate union all
select '2023-02-05' as xdate union all
select '2023-02-06' as xdate union all
select '2023-02-07' as xdate union all
select '2023-02-08' as xdate union all
select '2023-02-09' as xdate union all
select '2023-02-10' as xdate union all
select '2023-02-11' as xdate union all
select '2023-02-12' as xdate union all
select '2023-02-13' as xdate union all
select '2023-02-14' as xdate union all
select '2023-02-15' as xdate ;
-- 常规写法(正确)
select
ta.xdate
,tb.rdate
,tb.user_id
,tb.xmoney
from test_dim_date ta,
(select * ,lead(rdate,1,'2099-12-31') over(partition by user_id order by rdate) as lastday from test_data) tb
where ta.xdate>=tb.rdate
and ta.xdate < tb.lastday
order by user_id,xdate
-- spark-sql 写法
select
xdate
,user_id
,xmoney
,last_value(xmoney) ignore nulls over (partition by user_id order by xdate) as last_value_xmoney
from
(
select
ta.xdate,tb.user_id,case when ta.xdate = tb.rdate then tb.xmoney end as xmoney
from test_dim_date ta,
(select * ,lead(rdate,1,'2099-12-31') over(partition by user_id order by rdate) as lastday from test_data) tb
where ta.xdate>=tb.rdate
and ta.xdate < tb.lastday
) tt
order by user_id,xdate
-- hive-sql写法
select
xdate
,user_id
,xmoney
,last_value(xmoney,true) over (partition by user_id order by xdate) as last_value_xmoney
from
(
select
ta.xdate,tb.user_id,case when ta.xdate = tb.rdate then tb.xmoney end as xmoney
from test_dim_date ta,
(select * ,lead(rdate,1,'2099-12-31') over(partition by user_id order by rdate) as lastday from test_data) tb
where ta.xdate>=tb.rdate
and ta.xdate < tb.lastday
) tt
order by user_id,xdate