数据源文件:
#创建表:
DROP TABLE IF EXISTS `login`;
CREATE TABLE `login` (
`id` int(11) NOT NULL COMMENT '主键',
`user_id` int(11) NOT NULL COMMENT '用户ID',
`date` date DEFAULT NULL COMMENT '登录日期',
PRIMARY KEY (`id`)
)
#插入数据:
INSERT INTO login(id,user_id,date) VALUES('1','1','2020-11-12'),('2','1','2020-11-08'),('3','1','2020-11-10'),('4','1','2020-11-16'),('5','1','2020-11-13'),('6','1','2020-11-15'),('7','2','2020-11-11'),('8','2','2020-11-14'),('9','2','2020-11-08'),('10','2','2020-11-10'),('11','2','2020-11-15'),('12','2','2020-11-16'),('13','2','2020-11-09'),('14','3','2020-11-12'),('15','3','2020-11-15'),('16','3','2020-11-14'),('17','3','2020-11-08'),('18','3','2020-11-10'),('19','3','2020-11-13'),('20','3','2020-11-16'),('21','3','2020-11-09'),('22','4','2020-11-09'),('23','4','2020-11-15'),('24','4','2020-11-12'),('25','4','2020-11-08'),('26','4','2020-11-10'),('27','4','2020-11-11'),('28','5','2020-11-12'),('29','5','2020-11-10'),('30','5','2020-11-08'),('31','5','2020-11-09'),('32','5','2020-11-14'),('33','6','2020-11-14'),('34','6','2020-11-13'),('35','6','2020-11-16'),('36','6','2020-11-09'),('37','6','2020-11-10'),('38','6','2020-11-08'),('39','6','2020-11-12'),('40','6','2020-11-15'),('41','7','2020-11-08'),('42','7','2020-11-12'),('43','7','2020-11-10'),('44','7','2020-11-11'),('45','7','2020-11-16'),('46','7','2020-11-13'),('47','7','2020-11-09'),('48','7','2020-11-14'),('49','7','2020-11-15'),('50','8','2020-11-13'),('51','8','2020-11-12'),('52','8','2020-11-08'),('53','8','2020-11-11'),('54','8','2020-11-10'),('55','8','2020-11-14'),('56','8','2020-11-09'),('57','9','2020-11-14'),('58','9','2020-11-09'),('59','9','2020-11-13'),('60','9','2020-11-10'),('61','9','2020-11-15'),('62','9','2020-11-12'),('63','9','2020-11-11'),('64','10','2020-11-10'),('65','10','2020-11-09'),('66','10','2020-11-13'),('67','10','2020-11-14'),('68','10','2020-11-16'),('69','10','2020-11-08');
1.查询每个用户首次登录的日期,输出用户ID和首次登录日期
```bash
SELECT
user_id,
min(date) AS first_login_date
FROM
login
GROUP BY
user_id;
查询结果如下:
2.查询首次登录后,第二天仍登录的用户
SELECT
a.user_id,a.date
FROM
login a
JOIN (
SELECT
user_id,
DATE_ADD(min(date), INTERVAL 1 DAY) AS next_day
FROM
login
GROUP BY
user_id
) b ON a.user_id = b.user_id
AND a.date = b.next_day
3.统计次日留存率
统计方法:首次登录后,第二天仍登录的用户数占所有用户的比例
SELECT
round( rcnt / count(DISTINCT user_id),3) AS retention_rate
FROM
login
JOIN (
SELECT
count(*) AS rcnt
FROM
login a
JOIN (
SELECT
user_id,
DATE_ADD(min(date), INTERVAL 1 DAY) AS next_day
FROM
login
GROUP BY
user_id
) b ON a.user_id = b.user_id
AND a.date = b.next_day
) c
4. 统计每天新增的用户数(某天如果没有新增用户,则为0)
SELECT
date,
sum(CASE WHEN tag = 0 THEN 1 ELSE 0 END) AS new
FROM
(
SELECT
a.user_id,
a.date,
(
SELECT
count(*)
FROM
login b
WHERE
a.user_id = b.user_id
AND a.date > b.date
) AS tag
FROM
login a
) v
GROUP BY
date
ORDER BY
date
方法二:
SELECT
b.date,
ifnull(new,0) AS new_users
FROM
login b
LEFT JOIN (
SELECT
date,
count(DISTINCT user_id) AS new
FROM
(
SELECT
user_id,
min(date) AS date
FROM
login
GROUP BY
user_id
) a
GROUP BY
a.date
) c ON b.date = c.date
GROUP BY
b.date
5.统计每天新增用户的次日留存率
SELECT
firstlogin.date,
firstlogin.new_users,
secondlogin.retention_users,
case when firstlogin.new_users=0 then 0 else round(secondlogin.retention_users / firstlogin.new_users ,3) end as rate
FROM
(
SELECT
b.date,
ifnull(c.new_users, 0) AS new_users
FROM
login b
LEFT JOIN (
SELECT
date,
count(*) AS new_users
FROM
(
SELECT
user_id,
min(date) AS date
FROM
login
GROUP BY
user_id
) a
GROUP BY
date
) c ON b.date = c.date
GROUP BY
b.date
) firstlogin
LEFT JOIN (
SELECT
a.first_date,
count(a.next) AS retention_users
FROM
login l1
JOIN (
SELECT
user_id,
min(date) AS first_date,
DATE_ADD(min(date), INTERVAL 1 DAY) AS next
FROM
login
GROUP BY
user_id
) a ON l1.user_id = a.user_id
AND l1.date = a.next
GROUP BY
a.first_date
) secondlogin ON firstlogin.date = secondlogin.first_date