- 问题背景:
筛选出连续3个月出现的用户
- 核心思想:
同一个人,每月只有一条数据的情况下,连续3个月的月份,减去此人按月份的排序(rank2 列),得到的差(diff 列) 如果是连续的月,那么得到的差应该是相等的。
- 如图:
图中红框是经过筛选后,查到一个用户有5条数据, 图中蓝框所示的 差为 3个6 的三条数据,就是符合条件的三条数据,进一步进行排序(rank3 列),where >=3 就会得到连续出现3个月以上的用户。
- SQL
表结构很简单,工号,姓名,创建时间
--获取符合条件的基础数据
select * from
(
select cc.*,
--差排序
row_number() over(partition by personcode,diff order by [CreateTime] ) as rank3
from
(
select bb.*,
--月份和月份排序的差 ,连续3个一样的,就是连续3个月
(month-rank2) as diff from
(
select aa.*,
--单个人 月份排序
row_number() over(partition by personcode order by [CreateTime] ) as rank2
from
(
select temp.*,
--去重排序
row_number() over(partition by personcode,year,month order by [CreateTime] desc) as rank1
from
(
select * from
(
select
personcode,
UniqueId,
[CreateTime],
DATEPART(year,createTime) as year,
DATEPART(month,createTime) as month
from [AllData] where createTime>'2017-1-1' and createTime<'2018-12-31'
)as base
) as temp
) as aa
where aa.rank1=1
) as bb
)as cc
) as dd order by personcode,year,month --where rank3>=3