LEAD,LAG,ROW_NUMBER, CONCAT_WS,COLLECT_LIST 等SQL函数的练习

LEAD,LAG,ROW_NUMBER, CONCAT_WS,COLLECT_LIST 等SQL函数的练习,如图:

SQL题目描述

用impala写,可能和其他的语法有所出入

对于下面表有题目:

CREATE TABLE sqlexe1(
    user_id string
    ,rstatus bigint
    ,rdate string
);

INSERT INTO sqlexe1 VALUES 
('1001',0,'20211214'),
('1001',0,'20211215'),
('1001',1,'20211216'),
('1001',1,'20211217'),
('1001',0,'20211218'),
('1001',1,'20211219'),
('1001',0,'20211220'),
('1002',0,'20211213'),
('1002',1,'20211214'),
('1002',0,'20211215'),
('1002',1,'20211216'),
('1003',0,'20211214'),
('1003',1,'20211215'),
('1003',1,'20211216'),
('1003',1,'20211217');
  1. 连续登录三天的用户有哪些?
  2. 用户最新登录状态的前一天的日期和状态是什么?
  3. 最新一次登录及之前所有状态的统计是什么?

下面是我的笨方法:

<!-- 1. 连续登录三天的用户有哪些 -->
select user_id from
(    
    select
        user_id
        ,rdate
        ,lag(rdate,1) over(partition by user_id order by rdate) as rdate1
        ,lag(rdate,2) over(partition by user_id order by rdate) as rdate2
    from 
        business.sqlexe1
    where rstatus=1
    ) as a
    where cast(rdate2 as int) = cast(rdate as int)-2


<!-- 2. 用户最新登录状态的前一天的日期和状态是什么 -->
elect user_id,rstatus,n_date from
    (select * from business.sqlexe1
        left join (
        select user_id as uid,cast(cast(max(rdate) as int)-1 as string) as n_date
            from business.sqlexe1
            where rstatus=1 
            group by uid
            order by uid) as a
            on a.uid =sqlexe1.user_id) as b       
    where b.rdate = b.n_date   



<!-- 3. 用户最新登录状态的前一天的日期和状态是什么 -->
<!-- Impala 不支持 hive COLLECT_SET函数的方式,只能另作它法 -->
select user_id,GROUP_CONCAT(cast(rstatus as string)) as status_cnt from 
    (select * from business.sqlexe1
        left join (
            select user_id as uid,cast(cast(max(rdate) as int) as string) as n_date
                from business.sqlexe1
                where rstatus=1 
                group by uid
                order by uid) as a
                on a.uid =sqlexe1.user_id) as b
where rdate<=n_date
group by user_id
order by user_id

由于row_number可以用在一二两题中,比较经典,遂作尝试:

<!-- 1. 连续登录三天的用户有哪些 -->

SELECT user_id from
(SELECT * from (
        SELECT user_id,rdate,lag(rdate,2) OVER(PARTITION BY user_id ORDER BY rdate) as da FROM 
        (
            SELECT row_number() over(PARTITION BY user_id ORDER BY user_id) as num,
                user_id,rstatus,rdate FROM business.sqlexe1
            where rstatus = 1
        ) as a) as b
where b.da IS NOT NULL) as c
WHERE cast(da as int) = cast(rdate as int)-2


<!-- 2. 用户最新登录状态的前一天的日期和状态是什么(这个用的不好,太冗余了) -->

SELECT user_id,rstatus,rdate from(
    SELECT b.user_id,mm,c.rstatus,c.rdate from 
            (SELECT max(a.num)-1 as mm,a.user_id from
              (SELECT row_number() over(PARTITION BY user_id ORDER BY user_id) as num,user_id,rstatus,rdate 
                FROM business.sqlexe1) as a
                WHERE a.rstatus = 1
                GROUP BY a.user_id
                ORDER BY a.user_id) as b
                
                RIGHT JOIN 
                
                (SELECT row_number() over(PARTITION BY user_id ORDER BY user_id) as num,user_id,rstatus,rdate 
                    FROM business.sqlexe1) as c
                
                on b.user_id = c.user_id and b.mm = c.num
                ORDER BY b.user_id) as d
                where user_id IS NOT NULL
                ORDER BY user_id
                  

以下是mentor的答案,重点是关注年份与row_number之差相等的关系:

<!-- 1. 连续登录三天的用户有哪些 -->
SELECT
T2.user_id,
T3.rstatus,
T3.rdate
FROM(
SELECT
T1.user_id,
T1.rstatus,
T1.rdate,
ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY rdate desc) rn
FROM business.sqlexe1 T1
WHERE T1.rstatus = 1
) T2
LEFT JOIN business.sqlexe1 T3
ON T3.user_id = T2.user_id
AND CAST(T3.rdate AS BIGINT) = CAST(T2.rdate AS BIGINT) - 1
WHERE T2.rn = 1;

<!-- 2. 用户最新登录状态的前一天的日期和状态是什么 -->
SELECT 
T1.user_id
FROM(
SELECT
user_id,
rstatus,
rdate,
LEAD(rdate,2) OVER(PARTITION BY user_id ORDER BY rdate asc) rdate2
FROM business.sqlexe1
WHERE rstatus = 1
) as T1
HAVING CAST(T1.rdate2 AS BIGINT) - CAST(T1.rdate AS BIGINT)+1 = 3;

<!-- 3. 用户最新登录状态的前一天的日期和状态是什么 -->
SELECT
T2.user_id
FROM(
SELECT 
T1.user_id,
CAST(rdate AS BIGINT)-CAST(rn AS BIGINT) rdate2
FROM(
SELECT
user_id,
rstatus,
rdate,
ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY rdate ASC) rn
FROM business.sqlexe1
WHERE rstatus = 1  
) AS T1
) AS T2
GROUP BY user_id,rdate2
HAVING COUNT(1)=3;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值