字节大数据sql原题(五种解法)

显示高出勤期间的id和visitor_cnt。高出勤期间定义为三个及以上连续的id编号,访客人数超过100人 。按照id升序排序输出结果。

数据源

with data as (
    select  1 as id, 100 as visitor_cn union all
    select  2 as id, 10 as visitor_cn union all
    select  3 as id, 120 as visitor_cn union all
    select  4 as id, 110 as visitor_cn union all
    select  5 as id, 700 as visitor_cn union all
    select  6 as id, 100 as visitor_cn union all
    select  8 as id, 60 as visitor_cn union all
    select  9 as id, 160 as visitor_cn union all
    select  10 as id, 160 as visitor_cn union all
    select  11 as id, 160 as visitor_cn
)

思路

连续id,这个已经锚定了一个思路,打标分组,把连续的id分到同一组上

解法1

where限定条件过滤不合适的数据,row_number()直接排序,此时得到全局连续的rn,但原id过滤后就是不连续的,此时原id减去rn,就会得到一个个标记,标记相同就是同一组并连续的,不同则是不同一组且不连续的,开窗count()统计个数并过滤不符合要求的

select id, visitor_cn
from (
    select id, visitor_cn
         , count(1) over(partition by id - row_number() over (order by id)
             rows between unbounded preceding and unbounded following) cnt
    from data
    where visitor_cn > 100
     ) t
where cnt >= 3
order by id

解法2

lag()取上一条数据id,使用if()打标0、1,sum()累加分组汇总

select id, visitor_cn
from (
    select id, visitor_cn, count(1) over(partition by par rows between unbounded preceding and unbounded following) cnt
    from (
        select id, visitor_cn, sum(`if`(id - lag(id) over(order by id) = 1, 0, 1)) over(order by id) par
        from data
        where visitor_cn > 100
         ) t
     ) t1
where cnt >= 3

解法3

使用left join左外不等连接,取得连续一个id,开窗count(B.id)累计一个伪连续序列(0,1,2,3,3,4,5),A.id - count(B.id) over(),一个不连续序列(1,2,3,4,7,8,9)减去一个伪连续序列(0,1,2,3,3,4,5),求得分组

select id, visitor_cn
from (
          SELECT A.id, A.visitor_cn
                 ,count(1) over(partition by A.id - COUNT(B.id) OVER (ORDER BY A.id)
                     rows between unbounded preceding and unbounded following) AS cnt
          FROM data A
          LEFT JOIN data B ON A.id = B.id + 1
          WHERE A.visitor_cn > 100
         ) t
where cnt >= 3

解法4

使用子查询,lag()取前二位数并与id相减,过滤后构造array(i,i-1,i-2)数组并炸裂开去重得到连续id

select id, visitor_cn
from data
where id in (
        select distinct explode(`array`(id, id - 1, id - 2)) id
        from (
        select id, visitor_cn, lag(id, 2) over(order by id) rk
        from data
        where visitor_cn > 100
             ) t1
        where id - 2 = rk
    )
order by id

解法5

道理一样,不过使用count() over(range)取等于_3_ 的数据行,过滤后构造array(i,i-1,i-2)数组并炸裂开去重得到连续id

select id, visitor_cn
from data
where id in (
        select distinct explode(`array`(id, id - 1, id - 2)) id
        from (
        select id, visitor_cn
             , count(1) over(order by id range between 2 preceding and current row) cnt
        from data
        where visitor_cn > 100
             ) t1
        where cnt = 3
    )
order by id

这是我的几个解法,主要是这个打标分组的逻辑很重要,通过函数计算前后项从而打上标签

有新的解法欢迎下方留言~

sql代码附录

with data as (
    select  1 as id, 100 as visitor_cn union all
    select  2 as id, 100 as visitor_cn union all
    select  3 as id, 120 as visitor_cn union all
    select  4 as id, 110 as visitor_cn union all
    select  5 as id, 700 as visitor_cn union all
    select  6 as id, 100 as visitor_cn union all
    select  8 as id, 60 as visitor_cn union all
    select  9 as id, 160 as visitor_cn union all
    select  10 as id, 160 as visitor_cn union all
    select  11 as id, 160 as visitor_cn
)
--------------------------------------count()&row_number()-------------------------------------
-- select id, visitor_cn
-- from (
--     select id, visitor_cn
--          , count(1) over(partition by id - row_number() over (order by id)
--              rows between unbounded preceding and unbounded following) cnt
--     from data
--     where visitor_cn > 100
--      ) t
-- where cnt >= 3
-- order by id

------------------------------------------sum()&lag()------------------------------------------------
-- select id, visitor_cn
-- from (
--     select id, visitor_cn, count(1) over(partition by par rows between unbounded preceding and unbounded following) cnt
--     from (
--         select id, visitor_cn, sum(`if`(id - lag(id) over(order by id) = 1, 0, 1)) over(order by id) par
--         from data
--         where visitor_cn > 100
--          ) t
--      ) t1
-- where cnt >= 3

--------------------------------------count()&LEFT JOIN------------------------------------------------------
-- select id, visitor_cn
-- from (
--           SELECT A.id, A.visitor_cn
--                  ,count(1) over(partition by A.id - COUNT(B.id) OVER (ORDER BY A.id)
--                      rows between unbounded preceding and unbounded following) AS cnt
--           FROM data A
--           LEFT JOIN data B ON A.id = B.id + 1
--           WHERE A.visitor_cn >= 100
--          ) t
-- where cnt >= 3

-----------------------------------in () & array()$explode()-----------------------------------------------------
-- select id, visitor_cn
-- from data
-- where id in (
--         select distinct explode(`array`(id, id - 1, id - 2)) id
--         from (
--         select id, visitor_cn, lag(id, 2) over(order by id) rk
--         from data
--         where visitor_cn > 100
--              ) t1
--         where id - 2 = rk
--     )
-- order by id

---------------------------- explode() & array() & count() over(range)----------------------------------------------------
-- select id, visitor_cn
-- from data
-- where id in (
--         select distinct explode(`array`(id, id - 1, id - 2)) id
--         from (
--         select id, visitor_cn
--              , count(1) over(order by id range between 2 preceding and current row) cnt
--         from data
--         where visitor_cn > 100
--              ) t1
--         where cnt = 3
--     )
-- order by id
  • 4
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值