550:游戏玩法分析IV
SQL架构
Create table If Not Exists Activity_550 (player_id int, device_id int, event_date date, games_played int);
Truncate table Activity_550;
insert into Activity_550 (player_id, device_id, event_date, games_played) values ('1', '2', '2016-03-01', '5');
insert into Activity_550 (player_id, device_id, event_date, games_played) values ('1', '2', '2016-05-02', '6');
insert into Activity_550 (player_id, device_id, event_date, games_played) values ('1', '3', '2017-06-25', '1');
insert into Activity_550 (player_id, device_id, event_date, games_played) values ('3', '1', '2016-03-02', '0');
insert into Activity_550 (player_id, device_id, event_date, games_played) values ('3', '4', '2018-07-03', '5');
Table: Activity
+--------------+---------+
| Column Name | Type |
+--------------+---------+
| player_id | int |
| device_id | int |
| event_date | date |
| games_played | int |
+--------------+---------+
(player_id,event_date)是此表的主键。
这张表显示了某些游戏的玩家的活动情况。
每一行是一个玩家的记录,他在某一天使用某个设备注销之前登录并玩了很多游戏(可能是 0 )。
编写一个 SQL 查询,报告在首次登录的第二天再次登录的玩家的比率,四舍五入到小数点后两位。即,您需要计算从首次登录日期开始至少连续两天登录的玩家的数量,然后除以玩家总数。
查询结果格式如下所示:
Activity table:
+-----------+-----------+------------+--------------+
| player_id | device_id | event_date | games_played |
+-----------+-----------+------------+--------------+
| 1 | 2 | 2016-03-01 | 5 |
| 1 | 2 | 2016-03-02 | 6 |
| 2 | 3 | 2017-06-25 | 1 |
| 3 | 1 | 2016-03-02 | 0 |
| 3 | 4 | 2018-07-03 | 5 |
+-----------+-----------+------------+--------------+
Result table:
+-----------+
| fraction |
+-----------+
| 0.33 |
+-----------+
只有 ID 为 1 的玩家在第一天登录后才重新登录,所以答案是 1/3 = 0.33
解题
-- 求出所有玩家首次登陆数据
select player_id,min(event_date) first_date from Activity_550 group by player_id;
-- 将所有玩家首次登陆数据作为临时表,并和所有数据表activity进行关联
select *
from Activity_550 a,
(select player_id,min(event_date) first_date from Activity_550 group by player_id) b
where a.player_id=b.player_id;
-- 使用datediff函数计算玩家每次登陆和首次登陆的日期差,使用count+distinct函数获取所有玩家数
select datediff(a.event_date,b.first_date),(select count(distinct(player_id)) from Activity_550)
from Activity_550 a,
(select player_id,min(event_date) first_date from Activity_550 group by player_id) b
where a.player_id=b.player_id;
-- 使用case when then else过滤出所有符合条件的玩家,然后使用sum求和,计算出符合条件的玩家数目
select sum(case when datediff(a.event_date,b.first_date)=1 then 1 else 0 end),(select count(distinct(player_id)) from Activity_550)
from Activity_550 a,
(select player_id,min(event_date) first_date from Activity_550 group by player_id) b
where a.player_id=b.player_id;
-- 最后将符合条件的玩家数除以所有玩家数,并使用round函数,保留两位小数,并且按照例子上面的输出结果给列名取个别名即可
select round(sum(case when datediff(a.event_date,b.first_date)=1 then 1 else 0 end)/(select count(distinct(player_id)) from Activity_550),2) as fraction
from Activity_550 a,
(select player_id,min(event_date) first_date from Activity_550 group by player_id) b
where a.player_id=b.player_id;