197.上升温度(普通)-4
-
自行题解:通过
-
# Write your MySQL query statement below select w2.id as Id from Weather w1,Weather w2 where w2.recordDate = date_add(w1.recordDate,interval 1 day) and w2.Temperature > w1.Temperature;
-
解题思路
- 表自连接,实现两张表分别表示昨天和今天
- w1表示昨天,w2表示今天
- 今天是昨天+1,datetype不能直接加,要使用date_add或date_sub反向减
- and 今天比昨天的气温高
-
-
官方题解
-
SELECT weather.id AS 'Id' FROM weather JOIN weather w ON DATEDIFF(weather.date, w.date) = 1 AND weather.Temperature > w.Temperature ;
-
解题思路,使用了datediff
-
DATEDIFF()返回以天数表示的从一个日期到另一个日期的值exp1 - exp2。Expr1和expr2是日期或日期和时间表达式。只有值的日期部分在计算中使用。
-
使用datediff 时候要注意, 昨天向今天数是正向的,今天向昨天数是逆向的
-
-
其他方法
-
select Id from ( select temperature, recordDate, lead(id,1) over (order by recordDate) as 'Id', lead(recordDate,1) over (order by recordDate) as 'nextDate', lead(temperature,1) over (order by recordDate) as 'nextTemp' from weather )t where nextTemp > temperature and DATEDIFF(nextDate, recordDate) = 1
-
lag超过为null, lead 超过为0
-
就是一那一列偏移,偏移多少,默认从哪开始,以哪列排序
-
178.分数排名(普通)-5
- 排名是数据库问题经典题目,可划分为三种场景
- 连续排名 1-2-3-4 不同薪水不同名
- 同薪,同名,排名不连续,1-2-2-4
- 同薪,同名,排名连续 ,1-2-2-3
Idea1:window函数
-
同分不同名,排名连续 row_number( ) over
-
同分同名,排名不连续 rank( ) over
-
同分同名,排名连续 dence_rank( ) over
-
select Score, dense_rankrank() over w as 'Rank' from Scores window w as (order by Score)
Idea2: join & group by & order by & count
-
select s1.Score , count(distinct s2.Score) as "Rank" from Scores s1 join Scores s2 on (s1.Score<=s2.Score) group by s1.Id order by s1.Score desc -- order by 2 -- 这里order by可以写2,表示根据count(distinct s2.Score) -- 也可以填写Rank 因为投影的第二行 as 为Rank的
180.连续出现的数字(普通)-6
- 重点是连续
Idea1:case when
-
select distinct Num as ConsecutiveNums from ( select Num, case when @prev = Num then @count := @count + 1 when (@prev := Num) is not null then @count := 1 end as CNT from Logs, (select @prev := null,@count := null) as t ) as temp where temp.CNT >= 3
-
解题思路
-
不需要使用自连接也不用自关联,效率更高,不受id是否连续限制,可以设置任意次数
-
when的顺序不能替换
-
使用自定义变量要注意, = 是比较运算符, := 是赋值运算符
Idea2:in
-
# Write your MySQL query statement below select distinct Num as ConsecutiveNums from Logs where (Id+1,Num) in (select * from Logs) and (Id+2,Num) in (select * from Logs)
-
需要三行连续,以id 为基准,对比id+1,num对比id+2,num这两行,判断是不是有这样的数据
Idea3:connection
-
SELECT * FROM Logs l1, Logs l2, Logs l3 WHERE l1.Id = l2.Id - 1 AND l2.Id = l3.Id - 1 AND l1.Num = l2.Num AND l2.Num = l3.Num;
-
和上面那方法类似
Idea4:row_number( )
-
SELECT DISTINCT Num ConsecutiveNums FROM( SELECT *, ROW_NUMBER() OVER (PARTITION BY Num ORDER BY Id) rownum FROM LOGS ) t GROUP BY (Id-rownum) HAVING COUNT(*)>=3
-
row_number 同分不同名,排序连续
-
partition by 按照num分组,按照id排序
-
partition by 排序后每一组都有自己的位序
-
id- rownum的意思
-
通过id减去rownum,经过分组的having 的count统计 >= 3 就得出连续
-
这种方法局限性比较高
181.超过经理收入的员工(简单)-7
-
select e1.name as employee from employee e1 ,employee e2 where e1.managerid = e2.id and e1.salary > e2.salary
-
select e1.name as employee from employee e1 left join employee e2 on e1.managerid = e2.id where e1.salary > e2.salary -- 比第一种方法快20毫秒 -- left join 又比 join 快 70毫秒
182.查找重复的电子邮件(简单)-8
-
# Write your MySQL query statement below select email from person group by email having count(id) >1 -- count(email) 也是可以的
-
select distinct p1.email from person p1 join person p2 on p1.email = p2.email and p1.id != p2.id -- 这种使用on 代替了where
183.从不订购的客户(简单)-9
-
select name as Customers from customers where id not in ( select customers.id from customers join orders on orders.customerId = customers.id ) -- 自己使用子查询是比较慢的 330ms SELECT Name 'Customers' FROM Customers WHERE Id NOT IN( SELECT CustomerId FROM Orders ) -- 这是最简化的
184.部门工资最高的员工(普通)-10
-
Idea1:window partition by order by where
-
select tmp_tbl.d_name as department, tmp_tbl.e_name as employee, salary from ( select dense_rank() over w as s_rank, employee.salary as salary, department.name as d_name, employee.name as e_name from employee join department on department.id = employee.departmentid window w as (partition by department.id order by employee.salary desc) ) tmp_tbl where tmp_tbl.s_rank =1 -- 自己解法 runtime:412ms -- 这种方法的好处就是 部门的第几名都可以,有一定健壮性
Idea2:where in clause select
-
select d.Name as Department, e.Name as Employee, e.Salary from Employee e,Department d where e.DepartmentId=d.id and (e.Salary,e.DepartmentId) in (select max(Salary),DepartmentId from Employee group by DepartmentId); -- runtime:412ms -- 有一定局限性,只能针对这题 -- 优秀之处就是限定了两个字段的 in -- 主要思想,分组后max求出单一最大值得出id获取其他信息
-
185.部门工资前三高的所有员工(困难)-11
-
# Write your MySQL query statement belowselect tmp_tbl.d_name as department, tmp_tbl.e_name as employee, salaryfrom ( select dense_rank() over w as s_rank, employee.salary as salary, department.name as d_name, employee.name as e_name from employee join department on department.id = employee.departmentid window w as (partition by department.id order by employee.salary desc) ) tmp_tblwhere tmp_tbl.s_rank <= 3 -- 太爽了上一题的方法直接使用上了-- runtime : 502 -- 估计是只用到了两个表的缘故,效率会比其他快一点
-
select d.name Department, a.name Employee, a.salary Salary from employee a left join employee b on a.departmentId = b.departmentId and a.salary < b.salary left join department d on a.departmentId = d.idwhere a.departmentId = d.idGROUP BY a.id HAVING COUNT(distinct b.salary) <=2-- 这种想法也比较精妙,通过left join 的on 来实现了分组,对分数进行比较相当于在一个select中写了第二次的groupb by -- 这里用了三个表,runtime : 583-- 主要思想就是employee 自连接 得出部门内的排名,取前三SELECT d.Name AS 'Department', e1.Name AS 'Employee', e1.SalaryFROM Employee e1 JOIN Department d ON e1.DepartmentId = d.IdWHERE 3 > (SELECT COUNT(DISTINCT e2.Salary) FROM Employee e2 WHERE e2.Salary > e1.Salary AND e1.DepartmentId = e2.DepartmentId );-- runtime: 868ms-- 求排名最标准的子查询写法
196.删除重复的电子邮箱(简单)-12
-
delete p2 from person p1 , person p2 where p1.email = p2.email and p1.id < p2.id
-
DELETE from Person Where Id not in ( select Id from( Select MIN(Id) as Id From Person Group by Email ) t)-- 下面那一句是会报错的-- You can't specify target table 'Person' for update in FROM clause-- 你不能在FROM子句中指定目标表'Person'进行更新-- 要先将查询出的id缓存起来,再根据此id删除,因为mysql不能先将select 出表中的某些值,再更新这个表-- 大概意思,就是查询出select表是随时变化的,要先确定出一个已经不会变的表之后再进行使用DELETE from Person Where Id not in ( Select MIN(Id) as Id From Person Group by Email )
262.行程和用户(困难)-13
-
-- 分析问题 -- 主要就是计算取消率 -- 司机或乘客的非禁止的订单数 / 非禁止用户生成的订单总数(用户包括乘客或司机,订单总数包括完成订单和乘客取消的订单和司机取消的订单) -- 而这些都是限制在时间区间下的 -- 司机或乘客的非禁止的订单数 -- client or driver -- banned = NO -- cancelled_by_client or cancelled_by_driver -- 总数 -- client or driver -- banned = no -- cancelled_by_client or cancelled_by_driver or completed -- 这两个只差的是 completed -- 反选就能得到前面两个条件,或者用or将两个条件连接 -- round(avg(Status='cancelled_by_driver'or Status = "cancelled_by_client"), 2) -- 然后使用round 进行 select Request_at 'Day', round(avg(Status!='completed'), 2) 'Cancellation Rate' FROM Trips t JOIN Users u1 ON (t.Client_id = u1.Users_id AND u1.Banned = 'No') JOIN Users u2 ON (t.Driver_id = u2.Users_id AND u2.Banned = 'No') WHERE Request_at BETWEEN '2013-10-01' AND '2013-10-03' GROUP BY Request_at;
-
# Write your MySQL query statement below select a.Request_at Day,round(sum(case when a.Status='completed' then 0 else 1 end)/count(*),2) 'Cancellation Rate' from Trips a inner join (select Users_Id from Users where role ='client' and Banned = 'No') b on a.Client_Id = b.Users_Id inner join (select Users_Id from Users where role ='driver' and Banned = 'No') c on a.Driver_Id = c.Users_Id where a.Request_at >= '2013-10-01' and a.Request_at<='2013-10-03' group by a.Request_at -- 这是执行速度最快的方法 -- 主要特点在使用子查询来建立临时表,但是临时表很小, 只包含一个字段,join on 的时候还加了条件 -- 再一个就是使用了sum / count(*) 来代替 avg -- sum 中又是通过 case when 得出 0 或 1 来判断筛选出元素 -- 然而自己实测并没有达到第一名的速度 真要优化还得看深入理解mysql
626.换座位(普通)-14
Idea1:case mod
-
#主要打成的条件单数位置+1,双数位置-1,最有一位如果是奇数,不用改变 select ( case when mod(seat.id,2) != 0 and count_table.id_count != id then id+1 when mod(seat.id,2) != 0 and count_table.id_count = id then id else id-1 end ) as id, seat.student from (select count(1)as id_count from seat) as count_table, seat order by id asc -- MOD(N,M), N % M, N MOD M -- Modulo operation. Returns the remainder of N divided by M. -- 取模操作,返回N除以M的余数 -- 单数,不是最后一个数, 是非最后一个数的单数id, id+1 -- 单数,是最后一个数,是最后一个数为单数id,不改变,id ,else 否则只剩余非最后一个数为双数的id,id-1操作
Idea2:case mod last_value()
select ( case when mod(id,2) = 1 and last_value(id) over w != id then id+1 when mod(id,2) = 1 and last_value(id )over w = id then id else id-1 end )as id, studentfrom seatwindow w as (order by id asc RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED following)order by id asc
627.变更性别(简单)-15
Idea1:update case
-
update salary set sex = ( case when sex = 'm' then 'f' else 'm' end )
Idea2:update if
-
update salary set sex = if(sex = 'm','f','m')
Idea3:update char ASCII
-
update salary set sex = char(ascii('m') + ascii('f') - ascii (sex));
511.游戏玩法分析1(简单)-16
idea1: distinct window partition by order by
-
# Write your MySQL query statement below select distinct player_id, first_value(event_date) over w as first_login from activity window w as (partition by player_id order by event_date asc)
Idea2:group by min()
-
select player_id, min(event_date) as first_login from activity group by 1
512.游戏玩法分析2(简单)-17
Idea1:window partition by order by first_value() distinct
-
# Write your MySQL query statement below select distinct player_id, first_value(device_id) over w as device_id from activity window w as (partition by player_id order by event_date asc ) #还有使用rank排序的速度是第一名,愿意暂时不知道
Idea2: Subqueries IN
-
#这种方法避开了order by 不能直接叠加筛选条件筛选的问题,筛选出以player_id分类后最早的日期,in用于对比已经筛选好的记录,得出device_id select player_id, device_id from activity where (player_id,event_date) in ( select player_id, min(event_date) from activity group by player_id )
534.游戏玩法分析3(普通)-18
Idea1:window partition order SUM
-
# Write your MySQL query statement below select player_id, event_date, sum(games_played) over w as games_played_so_far from activity window w as ( partition by player_id order by event_date asc range between unbounded preceding and current row ) -- 不写range 也能通过 -- 因为sum 的结果从现有的记录计算得出, -- 第一次次可见的数值只有5 -- 第二次可见的的数值包括第一行的5和第二行的6,此般累加
Idea2:case variable
-
# Write your MySQL query statement below -- 确实快了,但是快不了100毫秒,差距不大 -- 主要思想,使用变量叠加,代替sum SELECT player_id, event_date, CASE WHEN (@prevPlayer = player_id) THEN @totalSum := @totalSum + games_played WHEN (@prevPlayer := player_id) is not null THEN @totalSum := games_played END AS games_played_so_far FROM (SELECT * FROM Activity ORDER BY player_id, event_date) as t1, (SELECT @prevPlayer := null, @totalSum := null) as t2 -- 把order by 写在外面居然会慢200ms,有点意思
550.游戏玩法分析4(普通)-19
Idea1: datediff() round() avg(is not null)
-
-- 按照id分组,分组后选出首次登陆 -- 再得出第二天登陆,第二天登陆个数求平均 -- 使用了left join ,保证第一天是有登录的 select round(avg(a.event_date is not null),2) as fraction from ( select player_id, min(event_date) as first_day from activity group by player_id ) as tbl_temp left join activity a on ( a.player_id = tbl_temp.player_id and datediff(a.event_date,tbl_temp.first_day) = 1 )
Idea2: in date(min()+1)
-
select round(count(player_id) / total_player_id ,2) as fraction from activity, ( select count(distinct player_id) as total_player_id from activity ) as tbl_tmp where (player_id,event_date) in ( select player_id, date(min(event_date)+1) from activity group by player_id ) -- 筛选出第二天登陆的个数,除总数