表结构
--Drop
DROP TABLE IF EXISTS test_live_events;
--DDL
CREATE TABLE IF NOT EXISTS test_live_events
(
user_id INT COMMENT '用户id',
live_id INT COMMENT '直播id',
in_datetime STRING COMMENT '进入直播间时间',
out_datetime STRING COMMENT '离开直播间时间'
) COMMENT '直播间访问记录' STORED AS ORC TBLPROPERTIES ( "orc.compress" = "SNAPPY", "discover.partitions" = "false" );
--Insert items
INSERT OVERWRITE TABLE test_live_events VALUES
(100, 1, '2023-08-12 19:00:00', '2023-08-12 19:28:00'),
(100, 1, '2023-08-12 19:30:00', '2023-08-12 19:53:00'),
(100, 2, '2023-08-12 21:01:00', '2023-08-12 22:00:00'),
(101, 1, '2023-08-12 19:05:00', '2023-08-12 20:55:00'),
(101, 2, '2023-08-12 21:05:00', '2023-08-12 21:58:00'),
(102, 1, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(102, 2, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(102, 3, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(104, 1, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(104, 2, '2023-08-12 21:57:00', '2023-08-12 22:56:00'),
(105, 2, '2023-08-12 19:10:00', '2023-08-12 19:18:00'),
(106, 3, '2023-08-12 19:01:00', '2023-08-12 21:10:00');
直播间高峰人数
基本思路:
- 构建新明细:进直播间+1,出直播间-1
- 基于明细,计算当前在线人数
- 求每个直播间最大的在线人数
sql实现:
WITH tmp_live_status AS (
SELECT
user_id,
live_id,
in_datetime AS event_time,
1 AS status
FROM
test_live_events
UNION ALL
SELECT
user_id,
live_id,
out_datetime AS event_time,
-1 AS status
FROM
test_live_events
)
SELECT
live_id,
MAX(online_cnt) AS online_cnt
FROM (
SELECT
user_id,
live_id,
event_time,
status,
SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
FROM
tmp_live_status
) a
GROUP BY
live_id
;
直播间高峰人数持续时间
基本思路:
- 首先定位到达到最高峰人数的开始时间
- 通过下移函数找到有人离开的时间
- 俩时间的差值就是持续时间
sql实现:
WITH tmp_live_status AS (
SELECT
user_id,
live_id,
in_datetime AS event_time,
1 AS status
FROM
test_live_events
UNION ALL
SELECT
user_id,
live_id,
out_datetime AS event_time,
-1 AS status
FROM
test_live_events
)
SELECT
live_id,
event_time,
event_end_time,
unix_timestamp(event_end_time) - unix_timestamp(event_time) AS seconds_difference,
max_online_cnt
FROM(
SELECT
live_id,
event_time,
lead ( event_time, 1, event_time ) OVER (PARTITION BY live_id ORDER BY event_time ) event_end_time,
online_cnt,
max_online_cnt
FROM(
SELECT
live_id,
event_time,
online_cnt,
max( online_cnt ) OVER (PARTITION BY live_id) AS max_online_cnt
FROM (
SELECT
user_id,
live_id,
event_time,
status,
SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
FROM
tmp_live_status
) a
) b
)
where online_cnt=max_online_cnt
;