CREATE TABLE log_table1 (
id INT,
user_id INT,
date DATETIME,
question_id INT
);
INSERT INTO log_table1 (id, user_id, date, question_id) VALUES
(1, 1001, '2022-09-01 00:00:00', 1),
(2, 1001, '2022-09-02 00:00:00', 2),
(3, 1001, '2022-09-03 00:00:00', 3),
(4, 1002, '2022-09-04 00:00:00', 3),
(5, 1002, '2022-09-05 00:00:00', 8),
(6, 1002, '2022-09-19 00:00:00', 1),
(7, 1002, '2022-09-19 00:00:00', 4),
(8, 1002, '2022-09-20 00:00:00', 1),
(9, 1002, '2022-09-20 00:00:00', 5),
(10, 1003, '2022-09-19 00:00:00', 1),
(11, 1003, '2022-09-19 00:00:00', 2),
(12, 1003, '2022-09-19 00:00:00', 3),
(13, 1003, '2022-09-20 00:00:00', 3),
(14, 1003, '2022-09-20 00:00:00', 3),
(15, 1003, '2022-09-20 00:00:00', 3),
(16, 1003, '2022-09-21 00:00:00', 7),
(17, 1003, '2022-09-21 00:00:00', 8),
(18, 1003, '2022-09-22 00:00:00', 1),
(19, 1003, '2022-09-22 00:00:00', 9),
(20, 1003, '2022-09-23 00:00:00', 10),
(21, 1004, '2022-09-20 00:00:00', 2),
(22, 1004, '2022-09-21 00:00:00', 3),
(23, 1004, '2022-09-22 00:00:00', 4),
(24, 1004, '2022-09-23 00:00:00', 5);
如上表log_table,表存储的数据是用户的做题记录。用户没做过的题为新题,写SQL找出2022-09-19到2022-09-23期间内,每天都做过新题的用户ID。
id为索引值,question id是题目的编号,不同的题目id不同新题定义:该用户之前从没做过该题,对该用户来说,为新题。
SELECT user_id
FROM log_table1 lt
WHERE date BETWEEN '2022-09-19' AND '2022-09-23'
AND NOT EXISTS (
SELECT 1
FROM log_table1
WHERE user_id = lt.user_id
AND date < lt.date
AND question_id = lt.question_id
)
GROUP BY user_id
HAVING COUNT(DISTINCT date) = 5;
此查询首先在WHERE子句中筛选出日期在2022-09-19到2022-09-23之间的记录。然后,使用NOT EXISTS子查询排除每个用户已经在当天之前做过的题目。最后,通过GROUP BY用户ID分组,并通过HAVING子句筛选出每个用户在时间范围内的独立日期数量等于5(表示每天都出现)。
SELECT user_id
FROM (
SELECT
user_id,
date,
question_id,
COUNT(*) OVER (PARTITION BY user_id, question_id) AS cnt
FROM log_table
WHERE DATE(date) BETWEEN '2022-09-19' AND '2022-09-23'
) subquery
WHERE cnt = 1
GROUP BY user_id
HAVING COUNT(DISTINCT DATE(date)) = 5;