Leetcode SQL 解析(Day1)

1. 游戏玩法分析2

  • 解析

#方法一:嵌套查询
select player_id,device_id from Activity
where (player_id,event_date) in (
    select player_id,min(event_date)
    from Activity
    group by player_id
 )

#方法二:时间窗口查询
select player_id,device_id from 
(
    select 
        player_id,
        device_id,
        row_number() over(partition by player_id order by event_date) as rn
        #按照player_id分组,组内按照event_date排序,将组内每条数据重新排序编号为rn
    from Activity
) as a
where rn=1

ROW_NUMBER(): 求行数,结果为1,2,3,4

RANK(): 有间隔的分级,结果为1,2,2,4

DENSE_RANK(): 无间隔的分级,结果为1,2,2,3

2. 游戏玩法分析3

  • 解析

#方法一:自连接

#step1:自连接,得到图中整体的表(t1将设置为现在的时间戳,t2将为了找出比当前时间戳更早的记录)
select t1.player_id,t1.event_date
from Activity t1, Activity t2
where t1.player_id=t2.player_id

#step2:筛选日期在自身之前的记录,如图中灰色部分
t1.event_date>=t2.event_date

#step3:用group by 对两个主键进行分组并对目标字段进行sum(),如途中棕色虚线圈部分,综合代码为:
select player_id,
       event_date,
       sum(game_played)as game_played_so_far
from Activity t1, Activity t2
where t1.player_id=t2.player_id and t1.event_date>=t2.event_date #拼表+筛选
group by t1.player_id,t1.event_date #分组后用sum聚合
#方法二:时间窗口

select 
    player_id,
    event_date,
    sum(game_played)over(partition by player_id order by event_date)as game_played_so_far
    #按照player_id分组,组内按照event_date排序,将每条数据event_date之前的game_played累加
from Activity
group by t1.player_id,t1.event_date

sum(字段1) over(partition by 字段2 order by 字段3 rows between unbounded preceding and current row) as 新字段名

函数说明:

  • sum(tota_amount)的求和是针对后面over()窗口的求和,

  • over中partition by player_id order by event_date 针对player_id这一组按照event_date排序,

  • rows between unbounded preceding and current row 限定了行是按照在当前行不限定的往前处理(累加范围),通俗就是处理当前以及之前的所有行的sum,即3月时sum(amount)求的时1、2、3月的和,2月时sum(amount)求的是1、2月的和。unbounded意思无限的 preceding在之前的,current row当前行。

  • 这里可以改成:

SUM(Salary) OVER (PARTITION BY Id ORDER BY Month ROWS 2 PRECEDING)
#从当前月往前滚动2个月薪水累加(月份连续,如1,2月)
or
SUM(Salary) OVER (PARTITION BY Id ORDER BY Month range 2 PRECEDING)
#从当前月往前找到2个月薪水累加(月份不连续,如1,3月)

3. 游戏玩法分析4

  • 解析

# 方法一:用IN是否存在筛选第二天登录的记录

SELECT
    ROUND(COUNT(DISTINCT player_id)/(SELECT COUNT(distinct player_id) FROM Activity), 
    2) AS fraction #分母是个固定的常数,而分子要根据下面的条件过滤
FROM
    Activity
WHERE
    (player_id,event_date) 
    IN
    (SELECT player_id,Date(min(event_date)+1) 
    #得到每个玩家的次日登陆时间,看原数据中是否存在(存在说明第二天登录过,则被where筛选出来)
    FROM Activity
    Group by player_id) #给每个player_id都计算第二天登陆日期,为date()聚合
#方法二:JOIN+Avg()

SELECT ROUND (Avg(a.player_id is not null),2) as fraction #筛选的是a表,不符合on条件的为空
FROM Activity a right join #a:所有登陆记录
(
    SELECT player_id,min(event_date) as firstDate 
    FROM Activity
    Group by player_id
) b #b:把每个player_id第一天登录的时间拿出来

on a.player_id=b.player_id and datediff(a.event_date,b.firstDate)=1
#如果相差1天则有值,不等于1则为空,会被最开始筛选掉
#方法三:窗口函数

select round(count(distinct t.player_id)/(select count(distinct player_id) from Activity),2) fraction #在临时表中筛选后聚合
from
(
    select 
        a.player_id,
        first_value(a.event_date) over(partition by a.player_id  order by a.event_date)first_date,
        #按player_id进行分组,按event_date进行组内排序,取组内排序后第一个值
        lead(a.event_date,1)over(partition by a.player_id  order by a.event_date)next_date
        #使用LEAD()函数,可以从当前行访问下一行的数据或下一行之后的行
    from Activity a) t #建立临时表

where datediff(t.next_date,t.first_date) = 1 #在临时表中进行筛选

4. 员工薪水中位数

  • 解析:

#方法一:窗口函数用排序和总数判断
Select id,company,salary from(
    Select 
        id,company,salary,
        row_number() over (partition by company order by salary) rk,
        count(*) over(partition by company) cnt 
    from Employee e 
) t
where t.rk in (cnt/2,cnt/2+1,cnt/2+0.5)
# 如果是8个数字,中位数分别为第(4,5,4.5)个【偶数个:最中间的两个数字都是中位数】
# 如果是9个数字,中位数分别为第(4.5,5.5,5)个【奇数个:最中间的一个数字是中位数】
#方法二:把rank和count拼在一起后用where筛选判断
Select id,company,salary from
(
    Select 
        id,E1.company,salary,
        row_number() over(partition by company order by salary) rn,
        total
    from Employee E1 
    inner join
    (
        Select company,
        count(*) total
        from Employee 
        group by company
    )E2 on E1.company=E2.company
) T 
where rn BETWEEN total/2 AND total/2 + 1
# 如果是8个数字,中位数位于第(4,5)个之间【偶数个:最中间的两个数字都是中位数】
# 如果是9个数字,中位数位于第(4.5,5.5)之间【奇数个:最中间的一个数字是中位数】

5. 至少有5名直接下属的经理

  • 解析:

#方法一:子查询
select name from Employee 
where id in 
(
    select ManagerId
    from Employee
    group by ManagerID #将每个员工按对应经理分组
    having count(ManagerID)>=5 #找出管理员工数大于5的经理
)
# 方法二:自连接
select e1.name from 
Employee e1 left join Employee e2
on e1.id=e2.managerId 
#员工ID=经理ID,找出来的是所有经理员工(普通员工的ID不会等于自己经理的ID,经理员工的ID等于普通员工的ID)
#eg.给id=101的员工匹配managerID=101的人,匹配出来了5个,给id=102的员工匹配managerID=102的人,匹配出来了0个
group by e1.id #按照员工分组
having count(*)>=5 #

inner join的结果:

6. 给定数字的频率查询中位数

  • 解析

select avg(num) as median from
(
select num, 
       sum(frequency) over(order by num asc) as asc_amount, 
       #num正向排序,逐行累加frequency,正向累加到哪个数字的时候超过总数的一半
       sum(frequency) over(order by num desc) as desc_amount,
       #num反向排序,逐行累加frequency,反向累加到哪个数字的时候超过总数的一半
       sum(frequency) over() as total_num
from numbers
)a
where asc_amount >= total_num/2 and desc_amount >= total_num / 2 

子查询排序的结果为:

7. 当选者

  • 解析

#方法一:having count(#)
select C.name,V.id from #C是得票者name,V是投票者id
Candidate C left join Vote V 
on C.id=V.candidateId 
#得到的结果是{[["A", null], ["B", 4], ["B", 1], ["C", 3], ["D", 2], ["E", 5]]}
group by C.name
having count(*)>=all(select count(*) from vote group by candidateId) 
#按group by 的C.name来count(*)
# A>all(......): A大于all()中的所有数,即大于all中的最大值
#方法二:
select Name from Candidate
where id = 

(select CandidateId from Vote #找出投票表中得票数最大的id
group by CandidateId 
order by count(CandidateId) desc  #按groupby后的CandidateId来count()聚合
limit 0,1
)

8. 查询回答率最高的问题

  • 解析

select question_id survey_log from SurveyLog
group by question_id #按题目分组
order by avg(action='answer')desc,question_id #回答率,正确率等用avg()
#先按回答率排序,若回答率有相同则按question_id排序
limit 1

回答率,正确率等用avg()高效

9. 查询员工的累计薪水

SELECT Id, Month, Salary
FROM (
    SELECT Id, Month, 
           SUM(Salary) OVER (PARTITION BY Id ORDER BY Month range 2 PRECEDING) AS Salary, 
           #按id分组,组内按月份排序从当前行开始往前滚动,累加每个月的薪水
           rank() OVER (PARTITION BY Id ORDER BY Month DESC) AS r
           #按id分组,组内按月份降序排列,将序号标记为r(对大的月份的序号r=1)

    FROM Employee) t
WHERE r > 1 #筛选出除最近一个月(即最大月)之外的数据
ORDER BY Id, Month DESC;

子查询结果为(不筛选r>1):

10. 2016年成功的投保投资

  • 解析

select round(sum(TIV_2016),2) as TIV_2016 from(
    select  *,
            count(PID) over (partition by TIV_2015) num_tiv,
            #按TIV_2015分组一共有几组
            count(PID) over (partition by LAT,LON) num_city
            #按LAT,LON分组一共有几组
    from insurance) h
    where h.num_tiv>1 #TIV_2015组内个数大于1
    and h.num_city=1 #组内个数等于1

11. 订单最多的客户

`#方法一:用limit取最大值 or 用count>all()取最大值
select customer_number from Orders
group by customer_number
order by count(Order_number) desc
#having count(Order_number)>=all(
    # select count(order_number) from Orders group by customer_number)
    #每个customer_number的订单数量
limit 1
#方法二:count()=max()
select customer_number from Orders
group by customer_number
having count(order_number) = (
    select max(sum) from(
        select count(order_number) as sum from Orders group by customer_number)t
)
#方法三:窗口函数
select customer_number from(
    select customer_number,
           rank() over ( order by count(order_number) desc) as rk 
    from Orders
    group by customer_number
)t
where rk=1

#若为rank() over (partition by customer_number order by count(order_number) desc) as rk,则为在customer_number组内排序,而我们要的customer_number的组的排序,因此用group by 分组后按rank()聚合得到每个组按count(order_number)的排名

12. 好友申请1:通过率

select 
round(
    ifnull(
    count(distinct requester_id, accepter_id)/
    (select count(distinct sender_id, send_to_id) accept_rate from FriendRequest)
    ,0)
    ,2) accept_rate

from RequestAccepted

13. 好友申请2:谁有最多的好友

select id,sum(num1) as num

from (

    (select requester_id as id,count(accepter_id) as num1 from RequestAccepted R1
    group by requester_id)
    union all
    (select accepter_id as id,count(requester_id) as num1 from RequestAccepted R2
    group by accepter_id)
)t
group by id
order by num desc
limit 1

14. 连续空余座位

#找出下第一个和下第二个,再判断下第一个和下第二个是否空闲
select distinct(c1.seat_id)
from cinema c1 join cinema c2
on abs(c2.seat_id-c1.seat_id)=1 #编号连续可采用abs(A.id-B.id)=1
where c1.free=1 and c2.free=1
order by c1.seat_id

15. 销售员

#方法一:not in 子查询+左连接
select name from SalesPerson S
where sales_id not in (
    select O.sales_id from Orders O 
    left join Company C on O.com_id=C.com_id
    where C.name ='RED'
)
#方法二:全left join
select s.name, count(C.name) from SalesPerson S
left join Orders O on S.sales_id=O.sales_id
left join Company C on O.com_id=C.com_id and c.name='RED'
#符合筛选条件的才会被匹配出来
group by S.name
having count(C.name)=0#找到没有被匹配出来的
#结果是:[["John", 1], ["Amy", 0], ["Mark", 0], ["Pam", 1], ["Alex", 0]]}

16.树节点

select id ,

Case 
    When p_id is null Then "Root"
    When id  in (select distinct p_id from tree) Then "Inner" 
    #是别人的父节点的点,
    Else "Leaf"
End as Type

from tree

17. 二级关注者

#每一个关注者follower,作为被关注者followee拥有的关注者follower数量
select  followee as follower,count(distinct follower) num
#被关注者followee拥有的关注者follower数量:[["Alice", 1], ["Bob", 2], ["Donald", 1]]
from follow
where followee in (select distinct follower from follow)
#对每一个关注者follower,作为被关注者followee的身份进行筛选
group by  followee

18. 平均工资:部门与公司比较

#1.拼表,2.按日期和部门聚合计算平均金额和总金额,3.对比
select 
pay_month,
department_id,
case #3.对比
    when department_avg/company_avg>1 then 'higher'
    when department_avg/company_avg=1 then 'same' 
    else 'lower'
end as comparison
 
from(
    select #2.按日期和部门聚合计算平均金额和总金额
        date_format(pay_date,'%Y-%m') as pay_month,
        department_id,
        avg(amount) over (partition by pay_date) as company_avg, 
        avg(amount) over (partition by department_id,pay_date) as department_avg 
    
    from salary s 
    left join employee e on s.employee_id=e.employee_id #1.拼表
)t
group by department_id,pay_month
order by pay_month desc, department_id

19. 学生地理信息报告

select
    max(case when continent="America" then name else null end) as America
    #3. 按rk=1的三个(Jack,Xi,Pascal)聚合max(),case when得到(Jack,null,null)=jack
    max(case when continent="Asia" then name else null end) as Asia   
    max(case when continent="Europe" then name else null end) as Europe   
from
(
    select 
        name,
        continent,
        row_number()over (partition by continent order by name) as rk
        #按照continent分组,按照name排序为rk
    from Student
)t 
#1. t表结果是:[["Jack", "America", 1], ["Jane", "America", 2], ["Xi", "Asia", 1], ["Pascal", "Europe", 1]]
#2. rk是每个name在洲内排第几

group by rk #按照rk聚合上面的max()

#窗口函数中对continent进行分区排名 group by cur_rank 能把同一个洲的人分成不同的组 这样上面的max就能取出所有人 如果不加group by 的话 每个洲就只能取出一个人

#max()函数的功效:(‘jack’, null, null)中得出‘jack',(null,null)中得出null。 min()函数也可以。

20. 买下所有产品的客户

#问题:哪些客户买了所有产品
#思路:1. 找到每个客户购买的产品的个数,2. 找到购买个数=产品个数的客户

#方法一:利用join 筛选相同产品个数的
select c.customer_id 
from
( #找到每个客户购买的产品的个数
    select customer_id,count(distinct product_key) as cnt 
    #如果不加distinct的话,如果1客户买了两个5产品也会被算进去
    from customer
    group by customer_id
)c 
join
( #总产品个数
    select count(*) as cnt from product
)p
on c.cnt=p.cnt #找出客户购买个数=总产品个数


#方法二:having筛选
select customer_id
from customer
group by customer_id
having count(distinct product_key)=(select count(*) from product)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值