-- 二刷:
-- 550:方法4最优
-- 569:设置起始位置和步长;两个方法都看一下
-- 570:用连接查询做一下
-- 577:优化方法
-- 578:排序方式不错,节省了一步操作
-- 579:超级难!
-- 580:方法2中理解一个操作'select count(null);',该结果为0
-- 584:方法3神逻辑
-- 595:优化方式
512.游戏玩法分析 II
# 方法1 用于扩宽思路(都可以用变量做 但是简单题不见得更好)
SELECT a.player_id, a.device_id
FROM (
SELECT a.player_id, a.device_id
, CASE
WHEN @preCol = player_id THEN @rank := @rank + 1
ELSE @rank := 1
END AS rk, @preCol := player_id
FROM Activity a, (
SELECT @preCol := NULL, @rank := 0
) s
ORDER BY player_id, event_date
) a
WHERE a.rk = 1
# 方法2 效果更佳, 注意where中两个限定条件并不是我们要query的列(可以这么用)
select player_id,device_id
from activity
where
(player_id,event_date) in
(select player_id,min(event_date) from activity group by player_id)
# 方法3 自己的方法
select a.player_id, a.device_id from activity a join
(select player_id, min(event_date) first_date
from activity
group by player_id) t
on a.player_id=t.player_id and a.event_date=t.first_date;
534.游戏玩法分析 III
# 方法1 自己的方法 0.93
SELECT
player_id, event_date, games_played_so_far FROM
(SELECT case
when @now_id=player_id then 1
when @now_id:=player_id then @sum:=0 end,
player_id, event_date,cast(@sum:=@sum+games_played as decimal(10,0)) games_played_so_far
FROM
Activity a, (select @sum:=0, @now_id:=null) t1
order by player_id,event_date) t2;
# 方法2: 0.96(方法改进 其实可以用一个函数就搞定的)
select player_id, event_date, cast(case
when @para=player_id then @count:=@count+games_played
when @para:=player_id then @count:=games_played
end as signed) as games_played_so_far
from activity p, (select @count:=0,@para:=null) q order by player_id, event_date;
# 提示 方法2中的from语句 还有别的写法
# 但是我没明白有什么优点
...from (select * from Activity order by player_id,event_date) p, (select @count:=0,@para:=null) q;
550.游戏玩法分析 IV
这个题有一点:玩家连续两天以上玩游戏 和 连续两天玩游戏其实对结果没有什么影响!
# 方法一: 0.70
# 在最外层子查询中建立临时表(ID,今天日期,昨天日期,该玩家的信息数目)
# 尤其注意第四列,并不是对玩家按照连续排名(那样太麻烦)
SELECT round(COUNT(CASE
WHEN temp.rk = 2
AND datediff(temp.event_date, temp.preDate) = 1 THEN 1
ELSE NULL
END) / COUNT(DISTINCT temp.player_id), 2) AS fraction
FROM (
SELECT a.player_id, a.event_date
, CASE
WHEN @preCol = player_id THEN @preDate
ELSE @preDate := NULL
END AS preDate
, CASE
WHEN @preCol = player_id THEN @rank := @rank + 1
ELSE @rank := 1
END AS rk, @preDate := event_date, @preCol := player_id
FROM Activity a, (
SELECT @preCol := NULL, @preDate := NULL
, @rank := 0
) s
ORDER BY a.player_id, a.event_date
) temp
# 方法2: 注意题中要求的是"首次登陆的第二天.." 思路新颖 0.5399
SELECT round(COUNT(a.event_date) / COUNT(*), 2) AS fraction
FROM Activity a
RIGHT JOIN (
SELECT player_id, MIN(event_date) AS event_date
FROM Activity
GROUP BY player_id
) b
ON datediff(a.event_date, b.event_date) = 1
AND a.player_id = b.player_id
# 方法3: 对方法2改进一点 0.8089 注意差别
# 如果是统计行数一定要使用count(*)--阿里巴巴手册; 使用inner分组查询时,尽量不使用join,而是用where去限定.
SELECT ROUND(COUNT(t.player_id) / (SELECT COUNT(DISTINCT player_id) FROM Activity),2) AS fraction
FROM (
SELECT DISTINCT player_id, MIN(event_date) AS first_date
FROM Activity
GROUP BY player_id
) AS t, Activity a
WHERE t.player_id=a.player_id AND DATEDIFF(a.event_date,t.first_date)=1
# 方法4: 0.887 最优 case when datediff 求解
先求出每个人员最小登陆时间
使用datediff求出后续登陆时间的差值
使用case when将datediff差值为1的变为1,其余差值变为0
对case when进行求和,即可求出连续登陆1天的人数
再和总人数相除,取2位有效数字
SELECT round(sum(case when datediff(a.event_date,b.first_date)=1 then 1 else 0 end)/(select count(distinct(player_id)) from activity),2) as fraction
FROM activity a,
(select player_id,min(event_date) first_date from activity group by player_id) b
WHERE a.player_id=b.player_id
569.员工薪水中位数
知识点:
ceil() 向上取整
floor() 向下取整
round() 四舍五入
# 本题关键 是得到中位数的 区间范围(起始点位置和步长)!
# 方法1 亮点在通过if函数排名 0.90
SELECT A.Id,A.Company,A.Salary
FROM
(
SELECT
id,
company,
salary,
@trank:=IF(@pre_company=company,@trank+1,1) AS `trank`,
@pre_company:=company
FROM employee,(SELECT @pre_company:= NULL,@trank:=0) AS T
ORDER BY company,salary
) AS A
JOIN
(
SELECT company, CEIL(COUNT(*)/2) AS `beg`, if(COUNT(*)%2=1,0,1) AS `cnt`
FROM employee
GROUP BY company
) AS B
ON (A.company=B.company AND A.trank BETWEEN B.beg AND B.beg+B.cnt)
# 方法2: 0.9528
# 如果只是想单纯的给某个变量赋值的话, 建议单独一列单独辅助, 不要借助when内部
# cnt其实是一个变量(是通过一列去限制另一列)
select b.Id,b.Company,b.Salary
FROM (
select id,company,salary,
case @com when company then @rk:=@rk+1 else @rk:=1 end rk,
@com:=company
from employee,(select @rk:=0, @com:=null) a
order by company,salary) b
LEFT JOIN
(select company,count(*)/2 cnt from employee group by company) c
ON b.company=c.company
WHERE b.rk in (cnt+0.5,cnt+1,cnt);
570.至少有5名直接下属的经理
# 自己的方法 0.6811
select Name from Employee where ID in
(select ManagerID from employee where ManagerID is not null group by ManagerID having count(*)>4);
# 官方答案 0.8039,这说明连接查询真的比子查询好~
# 除此之外 学会写代码的格式 美观
SELECT
Name
FROM
Employee AS t1 JOIN
(SELECT
ManagerId
FROM
Employee
GROUP BY ManagerId
HAVING COUNT(ManagerId) >= 5) AS t2
ON t1.Id = t2.ManagerId
571. 给定数字的频率查询中位数
# 首先是思路:转化为两个区间是否存在交集的问题.
# 其次既然是比较交集,那么两个交集更好比较一点!
# 自己使用between and 进行优化, 最后到0.9716
SELECT AVG(D.NUMBER) AS `median`
FROM
(
SELECT A.*,B.*,
if(
B.beg between (@fre_sum+1) AND (@fre_sum + A.Frequency), 1,
if((B.beg+B.cnt) between (@fre_sum+1) AND (@fre_sum + A.Frequency),1,0)
) AS wanted,
@fre_sum:=@fre_sum+A.Frequency AS fre
FROM
numbers AS A,
(
SELECT
CEIL(SUM(N.frequency)/2) AS `beg`,
IF(SUM(N.frequency)%2=1,0,1) AS `cnt`
FROM numbers AS N
) AS B,
(SELECT @fre_sum:=0) AS C
ORDER BY A.number
) AS D
WHERE wanted = 1;
574.当选者
# 都是自己的方法~
# 方法1: 0.91
select Name
FROM
Candidate
WHERE id=
(select CandidateID from vote
group by CandidateID order by count(*) desc limit 1);
# 方法2: 使用连接查询代替子查询优化 0.94
select c.Name
FROM
Candidate c,
(select CandidateID from vote
group by CandidateID order by count(*) desc limit 1) t
WHERE c. id=t.CandidateID;
577. 员工奖金
# 方法1 0.90(不稳定)
SELECT name,bonus
FROM
employee e
LEFT JOIN Bonus b
ON e.empid=b.empid
WHERE
bonus < 1000 or bonus is null;
# 方法2 优化 0.91
SELECT name,bonus
FROM
employee e
LEFT JOIN Bonus b
ON e.empid=b.empid
WHERE
bonus < 1000
UNION ALL
SELECT name,bonus
FROM
employee e
LEFT JOIN Bonus b
ON e.empid=b.empid
WHERE
bonus is null;
578. 查询回答率最高的问题
# 这个题有点问题(数据)
# 自己的第一种方法 0.8496 有点麻烦
SELECT DISTINCT s2.question_id as survey_log
FROM
survey_log s2,
(SELECT question_id, count(answer_id)/count(if(action="show",1,null)) possibilty
FROM
survey_log s1
GROUP BY question_id
ORDER BY possibilty desc limit 1) t
WHERE s2.question_id=t.question_id;
# 方法2 常规方法 0.8922
SELECT
question_id AS 'survey_log'
FROM
survey_log
GROUP BY question_id
ORDER BY COUNT(answer_id) / SUM(IF(action = 'show', 1, 0)) DESC
LIMIT 1;
# 方法3 用于扩宽思路 (可能不对...)
# 其实本题中答题率的分母是固定的(当然也不一定),那么我们比较分子的次数就好啦!
SELECT question_id survey_log FROM survey_log
WHERE answer_id IS NOT NULL
GROUP BY question_id
ORDER BY COUNT(answer_id) DESC
LIMIT 1
579.查询员工的累计薪水–感觉超级难
# 自己的方法 有问题
# 如果某个用户中有五个月的数据, 这个demo是用不了的!
# 按照题的意思:如果某个用户有五个月数据,排除最近的一个月还剩4个月. 假如 还有1,2,3,4月
# 那么4月应该是计算的2,3,4的和
# 该方法存在的问题: 4月份是计算的是1,2,3,4的和
SELECT t4.ID,t4.Month,t4.sum_sal Salary
FROM
(SELECT t3.*, cast((case when @pre_id=t3.id then @sum_sal:=@sum_sal+t3.salary
when @pre_id:=t3.id then @sum_sal:=t3.salary end) as decimal(10,0)) sum_sal
FROM
(select t1.*
FROM
Employee t1
JOIN
(select Id, max(month) month from employee group by Id) t2
ON t1.Id=t2.id
WHERE t1.month<t2.month) t3, (select @pre_id:=null, @sum_sal:=0) t4
ORDER BY t3.id, t3.Month) t4
ORDER BY ID,Month desc;
# 方法2: 100%
# 先把每个月和前两个月的累计和计算出来,然后最后删除最大月
SELECT A.Id,A.MONTH,A.salary
FROM
(
SELECT
E.Id,
E.MONTH,
IF(E.Id != @pre_id,
E.Salary
,
E.Salary + @pre_salary1 + @pre_salary2
) AS `Salary`,
if(E.Id != @pre_id,
@pre_salary2:=0
,
@pre_salary2:=@pre_salary1
) AS `pre2`,
(@pre_salary1:=E.Salary) AS `pre1`,
@pre_id:=E.Id
FROM employee AS E,
(SELECT @pre_id:= NULL,@pre_salary1:=0,@pre_salary2:=0) AS T
ORDER BY E.Id,E.MONTH
) AS A
JOIN
(
SELECT E.Id, MAX(E.MONTH) AS `max_month`
FROM employee AS E
GROUP BY E.Id
) AS B
ON (A.Id=B.Id AND A.MONTH < B.max_month)
order by A.id,A.month desc
580. 统计各专业学生人数
# 方法1:不加order by排序会报错 0.8396
SELECT dept_name, ifnull(number,0) as student_number
FROM department d
LEFT JOIN
(SELECT dept_id, count(*) number
FROM student
GROUP BY dept_id) t
ON d.dept_id=t.dept_id
ORDER BY student_number desc, dept_name;
# 方法2: 0.9525(没有必要在第一个子查询中将人数算出来)
# count(右表的列名 会自动记录非null的行数)
SELECT d.dept_name , count(s.dept_id) 'student_number'
FROM
department d
LEFT JOIN student s
ON s.dept_id=d.dept_id
GROUP BY d.dept_id
ORDER BY student_number desc,dept_name;
584. 寻找用户推荐人–一道很简单的题目 考虑优化
# 方法1 0.5871
SELECT name
FROM
customer
WHERE referee_id is null
UNION ALL
SELECT name
FROM
customer
WHERE referee_id not in(2)
# 方法2:0.75 也不好
SELECT name
FROM
(SELECT name,if(referee_id is null or referee_id not in(2), 1, 0) result
FROM customer) t
WHERE result=1;
# 方法3:0.9824 逻辑简直无敌
SELECT name FROM customer
WHERE
ifnull(referee_id,0)!=2;
585. 2016年的投资
# 自己的第一次答案: 0.9724
# 在包含所有正确的答案的集合中 去掉错误的答案(完美逻辑)
SELECT SUM(TIV_2016) TIV_2016
FROM
insurance
WHERE pid in
(SELECT pid
FROM insurance
GROUP BY lat,lon
HAVING count(*)=1)
AND pid not in
(SELECT pid
FROM insurance
GROUP BY tiv_2015
HAVING count(*)=1)
595.大的国家
尽量使用 union 代替 or查询 ,查询优化, or会使索引失效,导致全文扫描.
select name,population,area from World where area > 3000000
union
select name,population,area from World where population > 25000000;
596.超过5名学生的课
方法一: 常规
select class from courses
group by class having count(distinct student) >=5
方法二: 还不完善,还没有解决重修学生的问题
select distinct class from(
select class,
case
when @prev = class then @count := @count + 1
when (@prev := class) is not null then @count := 1
end as num
from courses, (select @prev := null,@count := null) as t
order by class, student) t2
where num>=5;
要想解决这个问题 只能在最初的courses表中 首先去重, 所以就还要加一个嵌套子查询....性能就不好了