题目地址https://linklearner.com/datawhale-homepage/index.html#/learn/detail/70
工作用的mysql版本不能开窗,后面转hive了。时间不够了,先做几道打个卡!
Section A
练习一:
练习二:
练习三:
练习四:找出连续重复三次的数字
create table logss(
id VARCHAR(10) not null,
num VARCHAR(10) not null, PRIMARY key (id));
insert ignore into logss values('1','1'),('2','1'),('3','1'),('4','2'),('5','1'),('6','2'),('7','2');
select a.num from logss a
join logss b on a.id = b.id-1
join logss c on b.id = c.id-1
where a.num = b.num = c.num
练习五:识别节点属性
create table tree(
id VARCHAR(10),
p_id VARCHAR(10), PRIMARY key (id));
insert ignore into tree values('1',null),('2','1'),('3','1'),('4','2'),('5','2');
select id, case when p_id is not null and c_id is not null then 'Inner'
when p_id is not null and c_id is null then 'Leaf'
else 'Root' end as Type
from
(select b.id, b.p_id, a.c_id
from tree b left join
(select p_id, count(1) as c_id from tree group by p_id) a on a.p_id = b.id) c
练习六:找出至少有5个下属的主管
create table Employee(
id VARCHAR(10),
emp_name VARCHAR(10),
Department VARCHAR(10),
ManagerId VARCHAR(10), PRIMARY key (id));
insert ignore into Employee values('101','John','A',null),('102','Dan','A','101'),('103','James','A','101'),
('104','Amy','A','101'),('105','Anne','A','101'),('106','Ron','B','101');
-- 找出至少有5个下属的主管
select a.emp_name
from Employee a join
(select ManagerId,count(1) as emp_num from Employee group by ManagerId) b
on a.id = b.ManagerId
where b.emp_num >= '5'
练习七:找出回答率最高的问题
create table survey_log(
action VARCHAR(10),
question_id VARCHAR(10),
answer_id VARCHAR(10));
insert ignore into survey_log values('show','285',null),('answer','285','124124'),('show','369',null),('skip','369',null);
-- 找出回答率最高的问题,回答率=answer出现次数/show出现次数
select question_id
from
(select a.question_id, a.num/b.num as ratio
from
(select action, question_id, count(1) as num from survey_log where action='answer'
group by action, question_id) a
join
(select action, question_id, count(1) as num from survey_log where action='show'
group by action, question_id) b
on a.question_id = b.question_id order by ratio desc limit 1) c
select question_id from
(select question_id, sum(case when action = 'answer' then 1 else 0 end)/sum(case when action = 'show' then 1 else 0 end) as ratio
from survey_log
group by question_id order by ratio desc limit 1) a
练习八:各部门前3高工资的员工,包括开窗与不开窗的排序方法
create table employee(
id VARCHAR(10),
emp_name VARCHAR(10),
salary VARCHAR(10),
department_id VARCHAR(10));
insert ignore into employee values('1','Joe','70000','1'),('2','Henry','80000','2'),('3','Sam','60000','2'),('4','Max','90000','1'),('5','Janet','69000','1'),('6','Randy','85000','1');
-- 各部门前3高工资的员工,可以开窗 row_num() over (partition by departmen_id order by salary desc) ....
-- 使用变量 @rank
-- 顺序排名
set @rank:=0;
select salary, @rank:= @rank+1 as rank
from employee
order by salary desc
-- 并列排名(不跳过)
set @rank:=0, @a=null;
select emp_name, salary, case when @a = salary then @rank
when @a:=salary then @rank:= @rank+1 end as rank
from employee order by salary desc;
-- 并列排名(跳过)
set @rank = 0,@a=null,@z=1;
select emp_name, salary, @rank := if(@a=salary,@rank,@z) as rank,
@p:=salary, @z:=@z+1
from employee order by salary desc;
-- 分组后
-- 顺序排名
set @rank=0, @lb=null;
select department_id, emp_name, salary,
@rank:=if(@lb=department_id,@rank+1,1) as rank, @lb:=department_id as lb
from employee order by department_id,salary desc
-- 并列排名(不跳过)
set @rank=0, @lb=null, @a=null;
select department_id, emp_name, salary,
@rank:=if(@lb=department_id,if(@a=salary,@rank,@rank+1),1) as rank,
@lb:=department_id, @a:=salary
from employee order by department_id,salary desc
-- 并列排名(跳过)--注意变量更新的顺序
set @rank=0, @lb=null, @a=null,@z=1;
select department_id, emp_name, salary,
@z:=if(@lb=department_id,@z+1,1),
@rank:= if(@lb=department_id,if(@a=salary,@rank,@z),1) as rank,
@lb:=department_id, @a:=salary
from employee order by department_id,salary desc
-- 筛选各组前两名
set @rank=0, @lb=null, @a=null,@z=1;
select * from
(select department_id, emp_name, salary,
@z:=if(@lb=department_id,@z+1,1),
@rank:= if(@lb=department_id,if(@a=salary,@rank,@z),1) as rank,
@lb:=department_id, @a:=salary
from employee order by department_id,salary desc) a
where a.rank<=3
-- 若不考虑吗名次,则可用关联子查询:
select * from employee a
where (select count(1) from employee b
where a.department_id = b.department_id and a.salary<b.salary) <= 2
order by department_id, salary desc