再来道面试题

题目

取出每月连续 5 天有登录记录的用户。

解题方法

create table member_log 
as
with detail as (
              select '4/11/2020' as date_d ,  'A' as member_code ,  21 as sale_amt
    union all select '4/12/2020' as date_d ,  'A' as member_code ,  23 as sale_amt
    union all select '4/13/2020' as date_d ,  'A' as member_code ,  45 as sale_amt
    union all select '4/14/2020' as date_d ,  'A' as member_code ,  43 as sale_amt
    union all select '4/15/2020' as date_d ,  'A' as member_code ,  12 as sale_amt
    union all select '4/16/2020' as date_d ,  'A' as member_code ,  34 as sale_amt
    union all select '4/17/2020' as date_d ,  'A' as member_code ,  65 as sale_amt
    union all select '4/18/2020' as date_d ,  'A' as member_code ,  54 as sale_amt
    union all select '4/19/2020' as date_d ,  'A' as member_code ,  43 as sale_amt
    union all select '4/20/2020' as date_d ,  'A' as member_code ,  23 as sale_amt
    union all select '4/21/2020' as date_d ,  'A' as member_code ,  43 as sale_amt
    union all select '4/22/2020' as date_d ,  'A' as member_code ,  78 as sale_amt
    
    union all select '4/11/2020' as date_d ,  'B' as member_code ,  21 as sale_amt
    union all select '4/12/2020' as date_d ,  'B' as member_code ,  23 as sale_amt
    union all select '4/13/2020' as date_d ,  'B' as member_code ,  45 as sale_amt
), dim_date as (
              select '4/11/2020' as date_d
    union all select '4/12/2020' as date_d
    union all select '4/13/2020' as date_d
    union all select '4/14/2020' as date_d
    union all select '4/15/2020' as date_d
    union all select '4/16/2020' as date_d
    union all select '4/17/2020' as date_d
    union all select '4/18/2020' as date_d
    union all select '4/19/2020' as date_d
    union all select '4/20/2020' as date_d
    union all select '4/21/2020' as date_d
    union all select '4/22/2020' as date_d
), dim_member as (
    select 'A' as member_code  
    union all select 'B' as member_code  
), dim_all as (
    select a.date_d
         ,b.member_code
    from dim_date as a 
    cross join dim_member as b 
)
select a.member_code , concat_ws('',collect_list(if(b.date_d is null , '0' , '1'))) as co
from dim_all as a 
left join detail as b 
on a.date_d = b.date_d
and a.member_code = b.member_code 
group by a.member_code

我的思路是使用 collect_list 将行上的数据转到列上。得到表 member_log,它的数据如下:

member_code登录情况
A111111111111
A111000000000

最后使用这则表达式,匹配到连续 5 次,1{5,100},所以最终的 sql 如下所示:

select member_code ,co
from member_log 
where length(regexp_extract(co , '(1{5,100})' , 1)) > 5

还可以优化一下,以月为单位,用 1 表示上线,用 0 表示没有下线。

每当新来一天的数据,就加到 co 字段的后面。

总结

这里用到了行转列和正则表达式。关键式正则表达式使用比较新颖,将正则表达式中的连续性应用到这个题目中。这种做法省去了 row_number 的做法,效率也会搞很多。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值