'''
a表记录了点击的流水信息,包括用户id ,和点击时间
usr_id a a b a a a a
click_time t1 t2 t3 t4 t5 t6 t7
'''
-- 方式一:
use demo;
WITH t1 AS (
SELECT MemberID AS user_id, STime AS click_time
FROM OrderList
WHERE MemberID IS NOT NULL
/*选取demo.OrderList 作为底表测试数据*/
AND DATE_FORMAT(STime, '%Y-%m') = '2017-02'
),
t2 AS (
SELECT *
, row_number() OVER (ORDER BY click_time) AS rank1
, row_number() OVER (PARTITION BY user_id ORDER BY click_time) AS rank2
FROM t1
),
t3 AS (
SELECT *, rank1 - rank2 AS diff
FROM t2
),
t4 AS (
SELECT DISTINCT user_id
FROM t3
GROUP BY user_id, diff
HAVING COUNT(1) > 3
)
-- SELECT * from t4 ;
SELECT *
FROM t3
WHERE user_id IN (
SELECT user_id
FROM t4
)
ORDER BY user_id, diff, click_time;
-- 方式二:
SELECT DISTINCT user_id
FROM (
SELECT *, rank_1 - rank_2 AS diff
FROM (
SELECT *
,row_number() OVER (ORDER BY click_time) AS rank_1
,row_number() OVER (PARTITION BY user_id ORDER BY click_time) AS rank_2
FROM (
SELECT MemberID AS user_id, STime AS click_time
FROM OrderList
WHERE MemberID IS NOT NULL
/*选取demo.OrderList 作为底表测试数据*/
AND DATE_FORMAT(STime, '%Y-%m') = '2017-02'
) a
) b
) c
GROUP BY diff, user_id
HAVING COUNT(1) > 3;
/*
总结:相邻问题的本质就是基于研究对象(比如用户、会员、员工等),利用窗口函数对时间字段进行有差别的排序,然后基于研究对象和新增的{排序差值列},
进行分组计数的求连续点击、签到、复购等业务问题的计算;
*/
【sql面试题】求连续点击三次的用户数,而且中间不能有别人的点击
于 2022-07-13 17:03:48 首次发布