工作中,遇到这样一个需求:有一张产品状态表,记录每个产品的停用或启用状态以及状态发生变化的时间,需要统计每个月有多少在启用产品。
首先,根据状态变化的时间生成每个产品每个状态对应的开始时间和结束时间。
select product_id,status_desc,status_time,row_number() over(partition by product_id order by status_time) as rk
from dw.tableName
where status_desc = '已启用'
)t1
left join(
select product_id,status_desc,status_time,row_number() over(partition by product_id order by status_time) as rk
from dw.tableName
where status_desc = '已停用'
)t2
on t1.product_id=t2.product_id and t1.rk=t2.rk
where t2.product_id is not null
再根据产品启用和停用的时间间隔生成每月状态信息。
select product_id,start_status,substr(date_add(concat(start_date,'-','01'),idx),1,7) as ds
from(
select
t1.product_id,
t1.status_desc start_status,
t1.status_time start_date,
t2.status_desc end_status,
t2.status_time end_date,
int(months_between(concat(t2.status_time,'-','01'),concat(t1.status_time,'-','01'))) num_months
from(
select product_id,status_desc,status_time,row_number() over(partition by product_id order by status_time) as rk
from dw.tableName
where status_desc = '已启用'
)t1
left join(
select product_id,status_desc,status_time,row_number() over(partition by product_id order by status_time) as rk
from dw.tableName
where status_desc = '已停用'
)t2
on t1.product_id=t2.product_id and t1.rk=t2.rk
where t2.product_id is not null
and months_between(concat(t2.status_time,'-','01'),concat(t1.status_time,'-','01')) > 0
order by t1.product_id,start_date
)tmp
lateral view
posexplode(split(repeat('m_',num_months),'_')) temp as idx,x