目前,我可以收集所有活跃的用户:
with aux as(
select date
,collect_set(user_id) over(
partition by feature
order by cast(timestamp(date) as float)
range between (-90*60*60*24) following and 0 preceding
) as user_id
,feature
--
from (
select data
,feature
,collect_set(user_id)
--
from table
--
group by date, feature
)
)
--
select date
,distinct_array(flatten(user_id))
,feature
--
from aux
问题是,现在我只需要保留过去90天以上的用户
select date
,collect_set(case when user_created_at < date - interval 90 day
then user_id end) over(
partition by feature
order by cast(timestamp(date) as float)
range between (-90*60*60*24) following and 0 preceding
) as teste
,feature
from table
它不起作用的原因是collect_select()中的过滤器只过滤一天内的用户,而不是过滤过去90天内的所有用户,
我怎样才能正确地得到它?
作为参考,我使用此查询来验证是否正确:
select
count(distinct user_id) as total
,count(distinct case when user_created_at < date('2020-04-30') - interval 90 day then user_id end)
,count(distinct case when user_created_at >= date('2020-04-30') - interval 90 day then user_id end)
--
from table
--
where 1=1
and date >= date('2020-04-30') - interval 90 day
and date <= '2020-04-30'
and feature = 'a_feature'